mirror of
https://github.com/ceph/ceph
synced 2025-02-16 07:17:21 +00:00
Merge pull request #26768 from sebastian-philipp/upstream-pull-26684
mgr/orchestrator: device lights Reviewed-by: Ernesto Puerta <epuertat@redhat.com> Reviewed-by: Sage Weil <sage@redhat.com> Reviewed-by: Volker Theile <vtheile@suse.com>
This commit is contained in:
commit
3feda32916
@ -182,10 +182,19 @@ Example::
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
::
|
||||
|
||||
ceph orchestrator device ident-on <host> <devname>
|
||||
ceph orchestrator device ident-off <host> <devname>
|
||||
ceph orchestrator device fault-on <host> <devname>
|
||||
ceph orchestrator device fault-off <host> <devname>
|
||||
ceph orchestrator device ident-on <dev_id>
|
||||
ceph orchestrator device ident-on <dev_name> <host>
|
||||
ceph orchestrator device fault-on <dev_id>
|
||||
ceph orchestrator device fault-on <dev_name> <host>
|
||||
|
||||
ceph orchestrator device ident-off <dev_id> [--force=true]
|
||||
ceph orchestrator device ident-off <dev_id> <host> [--force=true]
|
||||
ceph orchestrator device fault-off <dev_id> [--force=true]
|
||||
ceph orchestrator device fault-off <dev_id> <host> [--force=true]
|
||||
|
||||
where ``dev_id`` is the device id as listed in ``osd metadata``,
|
||||
``dev_name`` is the name of the device on the system and ``host`` is the host as
|
||||
returned by ``orchestrator host ls``
|
||||
|
||||
ceph orchestrator osd ident-on {primary,journal,db,wal,all} <osd-id>
|
||||
ceph orchestrator osd ident-off {primary,journal,db,wal,all} <osd-id>
|
||||
|
@ -260,6 +260,9 @@ OSD management
|
||||
|
||||
.. py:currentmodule:: orchestrator
|
||||
|
||||
.. automethod:: Orchestrator.blink_device_light
|
||||
.. autoclass:: DeviceLightLoc
|
||||
|
||||
.. _orchestrator-osd-replace:
|
||||
|
||||
OSD Replacement
|
||||
|
@ -8,8 +8,8 @@ tasks:
|
||||
log-whitelist:
|
||||
- overall HEALTH_
|
||||
- \(MGR_DOWN\)
|
||||
- \(MGR_INSIGHTS_WARNING\)
|
||||
- \(insights_health_check
|
||||
- \(DEVICE_IDENT_ON\)
|
||||
- \(DEVICE_FAULT_ON\)
|
||||
- \(PG_
|
||||
- replacing it with standby
|
||||
- No standby daemons available
|
||||
|
@ -14,8 +14,11 @@ log = logging.getLogger(__name__)
|
||||
class TestOrchestratorCli(MgrTestCase):
|
||||
MGRS_REQUIRED = 1
|
||||
|
||||
def _cmd(self, module, *args):
|
||||
return self.mgr_cluster.mon_manager.raw_cluster_cmd(module, *args)
|
||||
|
||||
def _orch_cmd(self, *args):
|
||||
return self.mgr_cluster.mon_manager.raw_cluster_cmd("orchestrator", *args)
|
||||
return self._cmd("orchestrator", *args)
|
||||
|
||||
def _progress_cmd(self, *args):
|
||||
return self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", *args)
|
||||
@ -93,6 +96,30 @@ class TestOrchestratorCli(MgrTestCase):
|
||||
with self.assertRaises(CommandFailedError):
|
||||
self._orch_cmd("osd", "create", "notfound:device")
|
||||
|
||||
def test_blink_device_light(self):
|
||||
def _ls_lights(what):
|
||||
return json.loads(self._cmd("device", "ls-lights"))[what]
|
||||
|
||||
metadata = json.loads(self._cmd("osd", "metadata"))
|
||||
dev_name_ids = [osd["device_ids"] for osd in metadata]
|
||||
_, dev_id = [d.split('=') for d in dev_name_ids if len(d.split('=')) == 2][0]
|
||||
|
||||
for t in ["ident", "fault"]:
|
||||
self.assertNotIn(dev_id, _ls_lights(t))
|
||||
self._cmd("device", "light", "on", dev_id, t)
|
||||
self.assertIn(dev_id, _ls_lights(t))
|
||||
|
||||
health = {
|
||||
'ident': 'DEVICE_IDENT_ON',
|
||||
'fault': 'DEVICE_FAULT_ON',
|
||||
}[t]
|
||||
self.wait_for_health(health, 30)
|
||||
|
||||
self._cmd("device", "light", "off", dev_id, t)
|
||||
self.assertNotIn(dev_id, _ls_lights(t))
|
||||
|
||||
self.wait_for_health_clear(30)
|
||||
|
||||
def test_mds_add(self):
|
||||
self._orch_cmd("mds", "add", "service_name")
|
||||
|
||||
|
@ -4,17 +4,15 @@ ceph-mgr orchestrator interface
|
||||
|
||||
Please see the ceph-mgr module developer's guide for more information.
|
||||
"""
|
||||
import copy
|
||||
import sys
|
||||
import time
|
||||
import fnmatch
|
||||
from collections import namedtuple
|
||||
from functools import wraps
|
||||
import uuid
|
||||
import string
|
||||
import random
|
||||
import datetime
|
||||
|
||||
import six
|
||||
import copy
|
||||
|
||||
from mgr_module import MgrModule, PersistentStoreDict
|
||||
from mgr_util import format_bytes
|
||||
@ -449,6 +447,17 @@ class Orchestrator(object):
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def blink_device_light(self, ident_fault, on, locations):
|
||||
# type: (str, bool, List[DeviceLightLoc]) -> WriteCompletion
|
||||
"""
|
||||
Instructs the orchestrator to enable or disable either the ident or the fault LED.
|
||||
|
||||
:param ident_fault: either ``"ident"`` or ``"fault"``
|
||||
:param on: ``True`` = on.
|
||||
:param locations: See :class:`orchestrator.DeviceLightLoc`
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def update_mgrs(self, num, hosts):
|
||||
# type: (int, List[str]) -> WriteCompletion
|
||||
"""
|
||||
@ -947,6 +956,19 @@ class InventoryNode(object):
|
||||
return [cls(item[0], devs(item[1].data)) for item in hosts]
|
||||
|
||||
|
||||
class DeviceLightLoc(namedtuple('DeviceLightLoc', ['host', 'dev'])):
|
||||
"""
|
||||
Describes a specific device on a specific host. Used for enabling or disabling LEDs
|
||||
on devices.
|
||||
|
||||
hostname as in :func:`orchestrator.Orchestrator.get_hosts`
|
||||
|
||||
device_id: e.g. ``ABC1234DEF567-1R1234_ABC8DE0Q``.
|
||||
See ``ceph osd metadata | jq '.[].device_ids'``
|
||||
"""
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
def _mk_orch_methods(cls):
|
||||
# Needs to be defined outside of for.
|
||||
# Otherwise meth is always bound to last key
|
||||
|
@ -1,14 +1,14 @@
|
||||
import errno
|
||||
import json
|
||||
from functools import wraps
|
||||
|
||||
from prettytable import PrettyTable
|
||||
|
||||
try:
|
||||
from typing import Dict, List
|
||||
from typing import List, Set, Optional
|
||||
except ImportError:
|
||||
pass # just for type checking.
|
||||
|
||||
from functools import wraps
|
||||
|
||||
from ceph.deployment.drive_group import DriveGroupSpec, DriveGroupValidationError, \
|
||||
DeviceSelection
|
||||
@ -46,6 +46,120 @@ class OrchestratorCli(orchestrator.OrchestratorClientMixin, MgrModule):
|
||||
{'name': 'orchestrator'}
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(OrchestratorCli, self).__init__(*args, **kwargs)
|
||||
self.ident = set() # type: Set[str]
|
||||
self.fault = set() # type: Set[str]
|
||||
self._load()
|
||||
self._refresh_health()
|
||||
|
||||
def _load(self):
|
||||
active = self.get_store('active_devices')
|
||||
if active:
|
||||
decoded = json.loads(active)
|
||||
self.ident = set(decoded.get('ident', []))
|
||||
self.fault = set(decoded.get('fault', []))
|
||||
self.log.debug('ident {}, fault {}'.format(self.ident, self.fault))
|
||||
|
||||
def _save(self):
|
||||
encoded = json.dumps({
|
||||
'ident': list(self.ident),
|
||||
'fault': list(self.fault),
|
||||
})
|
||||
self.set_store('active_devices', encoded)
|
||||
|
||||
def _refresh_health(self):
|
||||
h = {}
|
||||
if self.ident:
|
||||
h['DEVICE_IDENT_ON'] = {
|
||||
'severity': 'warning',
|
||||
'summary': '%d devices have ident light turned on' % len(
|
||||
self.ident),
|
||||
'detail': ['{} ident light enabled'.format(d) for d in self.ident]
|
||||
}
|
||||
if self.fault:
|
||||
h['DEVICE_FAULT_ON'] = {
|
||||
'severity': 'warning',
|
||||
'summary': '%d devices have fault light turned on' % len(
|
||||
self.fault),
|
||||
'detail': ['{} fault light enabled'.format(d) for d in self.ident]
|
||||
}
|
||||
self.set_health_checks(h)
|
||||
|
||||
def _get_device_locations(self, dev_id):
|
||||
# type: (str) -> List[orchestrator.DeviceLightLoc]
|
||||
locs = [d['location'] for d in self.get('devices')['devices'] if d['devid'] == dev_id]
|
||||
return [orchestrator.DeviceLightLoc(**l) for l in sum(locs, [])]
|
||||
|
||||
@_read_cli(prefix='device ls-lights',
|
||||
desc='List currently active device indicator lights')
|
||||
def _device_ls(self):
|
||||
return HandleCommandResult(
|
||||
stdout=json.dumps({
|
||||
'ident': list(self.ident),
|
||||
'fault': list(self.fault)
|
||||
}, indent=4))
|
||||
|
||||
def light_on(self, fault_ident, devid):
|
||||
# type: (str, str) -> HandleCommandResult
|
||||
assert fault_ident in ("fault", "ident")
|
||||
locs = self._get_device_locations(devid)
|
||||
if locs is None:
|
||||
return HandleCommandResult(stderr='device {} not found'.format(devid),
|
||||
retval=-errno.ENOENT)
|
||||
|
||||
getattr(self, fault_ident).add(devid)
|
||||
self._save()
|
||||
self._refresh_health()
|
||||
completion = self.blink_device_light(fault_ident, True, locs)
|
||||
self._orchestrator_wait([completion])
|
||||
return HandleCommandResult(stdout=str(completion.result))
|
||||
|
||||
def light_off(self, fault_ident, devid, force):
|
||||
# type: (str, str, bool) -> HandleCommandResult
|
||||
assert fault_ident in ("fault", "ident")
|
||||
locs = self._get_device_locations(devid)
|
||||
if locs is None:
|
||||
return HandleCommandResult(stderr='device {} not found'.format(devid),
|
||||
retval=-errno.ENOENT)
|
||||
|
||||
try:
|
||||
completion = self.blink_device_light(fault_ident, False, locs)
|
||||
self._orchestrator_wait([completion])
|
||||
|
||||
if devid in getattr(self, fault_ident):
|
||||
getattr(self, fault_ident).remove(devid)
|
||||
self._save()
|
||||
self._refresh_health()
|
||||
return HandleCommandResult(stdout=str(completion.result))
|
||||
|
||||
except:
|
||||
# There are several reasons the try: block might fail:
|
||||
# 1. the device no longer exist
|
||||
# 2. the device is no longer known to Ceph
|
||||
# 3. the host is not reachable
|
||||
if force and devid in getattr(self, fault_ident):
|
||||
getattr(self, fault_ident).remove(devid)
|
||||
self._save()
|
||||
self._refresh_health()
|
||||
raise
|
||||
|
||||
@_write_cli(prefix='device light',
|
||||
cmd_args='name=enable,type=CephChoices,strings=on|off '
|
||||
'name=devid,type=CephString '
|
||||
'name=light_type,type=CephChoices,strings=ident|fault,req=false '
|
||||
'name=force,type=CephBool,req=false',
|
||||
desc='Enable or disable the device light. Default type is `ident`\n'
|
||||
'Usage: device light (on|off) <devid> [ident|fault] [--force]')
|
||||
def _device_light(self, enable, devid, light_type=None, force=False):
|
||||
# type: (str, str, Optional[str], bool) -> HandleCommandResult
|
||||
light_type = light_type or 'ident'
|
||||
on = enable == 'on'
|
||||
if on:
|
||||
return self.light_on(light_type, devid)
|
||||
else:
|
||||
return self.light_off(light_type, devid, force)
|
||||
|
||||
def _select_orchestrator(self):
|
||||
return self.get_module_option("orchestrator")
|
||||
|
||||
|
@ -246,6 +246,12 @@ class TestOrchestrator(MgrModule, orchestrator.Orchestrator):
|
||||
def remove_osds(self, osd_ids, destroy=False):
|
||||
assert isinstance(osd_ids, list)
|
||||
|
||||
@deferred_write("blink_device_light")
|
||||
def blink_device_light(self, ident_fault, on, locations):
|
||||
assert ident_fault in ("ident", "fault")
|
||||
assert len(locations)
|
||||
return ''
|
||||
|
||||
@deferred_write("service_action")
|
||||
def service_action(self, action, service_type, service_name=None, service_id=None):
|
||||
pass
|
||||
|
Loading…
Reference in New Issue
Block a user