mirror of
https://github.com/ceph/ceph
synced 2025-02-24 03:27:10 +00:00
mgr/dashboard: fix: get SMART data from single-daemon device
Return SMART data even when a device is only associated with a single daemon. Fixes: https://tracker.ceph.com/issues/53858 Signed-off-by: Alfonso Martínez <almartin@redhat.com>
This commit is contained in:
parent
b709091d81
commit
6cd3729e27
@ -8,7 +8,6 @@ from mgr_module import CommandResult
|
||||
from mgr_util import get_most_recent_rate, get_time_series_rates
|
||||
|
||||
from .. import mgr
|
||||
from ..exceptions import DashboardException
|
||||
|
||||
try:
|
||||
from typing import Any, Dict, Optional, Union
|
||||
@ -235,11 +234,7 @@ class CephService(object):
|
||||
# type: (dict) -> Dict[str, dict]
|
||||
# Check whether the device is associated with daemons.
|
||||
if 'daemons' in device and device['daemons']:
|
||||
dev_smart_data = None
|
||||
|
||||
# The daemons associated with the device. Note, the list may
|
||||
# contain daemons that are 'down' or 'destroyed'.
|
||||
daemons = device.get('daemons')
|
||||
dev_smart_data: Dict[str, Any] = {}
|
||||
|
||||
# Get a list of all OSD daemons on all hosts that are 'up'
|
||||
# because SMART data can not be retrieved from daemons that
|
||||
@ -250,26 +245,27 @@ class CephService(object):
|
||||
if node.get('status') == 'up'
|
||||
]
|
||||
|
||||
# Finally get the daemons on the host of the given device
|
||||
# that are 'up'. All daemons on the same host can deliver
|
||||
# SMART data, thus it is not relevant for us which daemon
|
||||
# we are using.
|
||||
daemons = list(set(daemons) & set(osd_daemons_up)) # type: ignore
|
||||
|
||||
for daemon in daemons:
|
||||
# All daemons on the same host can deliver SMART data,
|
||||
# thus it is not relevant for us which daemon we are using.
|
||||
# NOTE: the list may contain daemons that are 'down' or 'destroyed'.
|
||||
for daemon in device['daemons']:
|
||||
svc_type, svc_id = daemon.split('.')
|
||||
if 'osd' in svc_type:
|
||||
if daemon not in osd_daemons_up:
|
||||
continue
|
||||
try:
|
||||
dev_smart_data = CephService.send_command(
|
||||
svc_type, 'smart', svc_id, devid=device['devid'])
|
||||
except SendCommandError:
|
||||
except SendCommandError as error:
|
||||
logger.warning(str(error))
|
||||
# Try to retrieve SMART data from another daemon.
|
||||
continue
|
||||
elif 'mon' in svc_type:
|
||||
try:
|
||||
dev_smart_data = CephService.send_command(
|
||||
svc_type, 'device query-daemon-health-metrics', who=daemon)
|
||||
except SendCommandError:
|
||||
except SendCommandError as error:
|
||||
logger.warning(str(error))
|
||||
# Try to retrieve SMART data from another daemon.
|
||||
continue
|
||||
else:
|
||||
@ -280,10 +276,7 @@ class CephService(object):
|
||||
'[SMART] Error retrieving smartctl data for device ID "%s": %s',
|
||||
dev_id, dev_data)
|
||||
break
|
||||
if dev_smart_data is None:
|
||||
raise DashboardException(
|
||||
'Failed to retrieve SMART data for device ID "{}"'.format(
|
||||
device['devid']))
|
||||
|
||||
return dev_smart_data
|
||||
logger.warning('[SMART] No daemons associated with device ID "%s"',
|
||||
device['devid'])
|
||||
|
@ -109,23 +109,61 @@ def test_get_smart_data(caplog, by, args, log):
|
||||
|
||||
|
||||
@mock.patch.object(CephService, 'send_command')
|
||||
def test_get_smart_data_from_appropriate_ceph_command(send_command):
|
||||
def test_get_smart_data_by_device(send_command):
|
||||
# pylint: disable=protected-access
|
||||
send_command.side_effect = [
|
||||
{'nodes': [{'name': 'osd.1', 'status': 'up'}, {'name': 'mon.1', 'status': 'down'}]},
|
||||
{'fake': {'device': {'name': '/dev/sda'}}}
|
||||
]
|
||||
CephService._get_smart_data_by_device({'devid': '1', 'daemons': ['osd.1', 'mon.1']})
|
||||
send_command.assert_has_calls([mock.call('mon', 'osd tree'),
|
||||
mock.call('osd', 'smart', '1', devid='1')])
|
||||
device_id = 'Hitachi_HUA72201_JPW9K0N20D22SE'
|
||||
osd_tree_payload = {'nodes':
|
||||
[
|
||||
{'name': 'osd.1', 'status': 'down'},
|
||||
{'name': 'osd.2', 'status': 'up'},
|
||||
{'name': 'osd.3', 'status': 'up'}
|
||||
]}
|
||||
health_metrics_payload = {device_id: {'ata_apm': {'enabled': False}}}
|
||||
side_effect = [osd_tree_payload, health_metrics_payload]
|
||||
|
||||
send_command.side_effect = [
|
||||
{'nodes': [{'name': 'osd.1', 'status': 'down'}, {'name': 'mon.1', 'status': 'up'}]},
|
||||
{'fake': {'device': {'name': '/dev/sda'}}}
|
||||
]
|
||||
CephService._get_smart_data_by_device({'devid': '1', 'daemons': ['osd.1', 'mon.1']})
|
||||
# Daemons associated: 1 osd down, 2 osd up.
|
||||
send_command.side_effect = side_effect
|
||||
smart_data = CephService._get_smart_data_by_device(
|
||||
{'devid': device_id, 'daemons': ['osd.1', 'osd.2', 'osd.3']})
|
||||
assert smart_data == health_metrics_payload
|
||||
send_command.assert_has_calls([mock.call('mon', 'osd tree'),
|
||||
mock.call('osd', 'smart', '2', devid=device_id)])
|
||||
|
||||
# Daemons associated: 1 osd down.
|
||||
send_command.reset_mock()
|
||||
send_command.side_effect = [osd_tree_payload]
|
||||
smart_data = CephService._get_smart_data_by_device({'devid': device_id, 'daemons': ['osd.1']})
|
||||
assert smart_data == {}
|
||||
send_command.assert_has_calls([mock.call('mon', 'osd tree')])
|
||||
|
||||
# Daemons associated: 1 osd down, 1 mon.
|
||||
send_command.reset_mock()
|
||||
send_command.side_effect = side_effect
|
||||
smart_data = CephService._get_smart_data_by_device(
|
||||
{'devid': device_id, 'daemons': ['osd.1', 'mon.1']})
|
||||
assert smart_data == health_metrics_payload
|
||||
send_command.assert_has_calls([mock.call('mon', 'osd tree'),
|
||||
mock.call('osd', 'smart', '1', devid='1'),
|
||||
mock.call('mon', 'osd tree'),
|
||||
mock.call('mon', 'device query-daemon-health-metrics',
|
||||
who='mon.1')])
|
||||
|
||||
# Daemons associated: 1 mon.
|
||||
send_command.reset_mock()
|
||||
send_command.side_effect = side_effect
|
||||
smart_data = CephService._get_smart_data_by_device({'devid': device_id, 'daemons': ['mon.1']})
|
||||
assert smart_data == health_metrics_payload
|
||||
send_command.assert_has_calls([mock.call('mon', 'osd tree'),
|
||||
mock.call('mon', 'device query-daemon-health-metrics',
|
||||
who='mon.1')])
|
||||
|
||||
# Daemons associated: 1 other (non-osd, non-mon).
|
||||
send_command.reset_mock()
|
||||
send_command.side_effect = [osd_tree_payload]
|
||||
smart_data = CephService._get_smart_data_by_device({'devid': device_id, 'daemons': ['rgw.1']})
|
||||
assert smart_data == {}
|
||||
send_command.assert_has_calls([mock.call('mon', 'osd tree')])
|
||||
|
||||
# Daemons associated: no daemons.
|
||||
send_command.reset_mock()
|
||||
smart_data = CephService._get_smart_data_by_device({'devid': device_id, 'daemons': []})
|
||||
assert smart_data == {}
|
||||
send_command.assert_has_calls([])
|
||||
|
Loading…
Reference in New Issue
Block a user