mirror of
https://github.com/ceph/ceph
synced 2025-01-01 08:32:24 +00:00
mgr/devicehealth: style/whitespace cleanup
We don't usually do gratuitious whitespace commits, but for the python code it's worthwhile as it allows smart editors to show us real problems without them getting lost in the style noise. Signed-off-by: John Spray <john.spray@redhat.com>
This commit is contained in:
parent
d5e52b1f14
commit
e337da052f
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
Device health monitoring
|
Device health monitoring
|
||||||
"""
|
"""
|
||||||
@ -17,7 +16,7 @@ TIME_FORMAT = '%Y%m%d-%H%M%S'
|
|||||||
DEFAULTS = {
|
DEFAULTS = {
|
||||||
'enable_monitoring': str(True),
|
'enable_monitoring': str(True),
|
||||||
'scrape_frequency': str(86400),
|
'scrape_frequency': str(86400),
|
||||||
'retention_period': str(86400*14),
|
'retention_period': str(86400 * 14),
|
||||||
'pool_name': 'device_health_metrics',
|
'pool_name': 'device_health_metrics',
|
||||||
'mark_out_threshold': str(86400*14),
|
'mark_out_threshold': str(86400*14),
|
||||||
'warn_threshold': str(86400*14*2),
|
'warn_threshold': str(86400*14*2),
|
||||||
@ -33,15 +32,16 @@ HEALTH_MESSAGES = {
|
|||||||
DEVICE_HEALTH_TOOMANY: 'Too many daemons are expected to fail soon',
|
DEVICE_HEALTH_TOOMANY: 'Too many daemons are expected to fail soon',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class Module(MgrModule):
|
class Module(MgrModule):
|
||||||
OPTIONS = [
|
OPTIONS = [
|
||||||
{ 'name': 'enable_monitoring' },
|
{'name': 'enable_monitoring'},
|
||||||
{ 'name': 'scrape_frequency' },
|
{'name': 'scrape_frequency'},
|
||||||
{ 'name': 'pool_name' },
|
{'name': 'pool_name'},
|
||||||
{ 'name': 'retention_period' },
|
{'name': 'retention_period'},
|
||||||
{ 'name': 'mark_out_threshold' },
|
{'name': 'mark_out_threshold'},
|
||||||
{ 'name': 'warn_threshold' },
|
{'name': 'warn_threshold'},
|
||||||
{ 'name': 'self_heal' },
|
{'name': 'self_heal'},
|
||||||
]
|
]
|
||||||
|
|
||||||
COMMANDS = [
|
COMMANDS = [
|
||||||
@ -54,16 +54,20 @@ class Module(MgrModule):
|
|||||||
{
|
{
|
||||||
"cmd": "device scrape-daemon-health-metrics "
|
"cmd": "device scrape-daemon-health-metrics "
|
||||||
"name=who,type=CephString",
|
"name=who,type=CephString",
|
||||||
"desc": "Scrape and store device health metrics for a given daemon",
|
"desc": "Scrape and store device health metrics "
|
||||||
|
"for a given daemon",
|
||||||
"perm": "r"
|
"perm": "r"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cmd": "device scrape-health-metrics name=devid,type=CephString,req=False",
|
"cmd": "device scrape-health-metrics "
|
||||||
|
"name=devid,type=CephString,req=False",
|
||||||
"desc": "Scrape and store health metrics",
|
"desc": "Scrape and store health metrics",
|
||||||
"perm": "r"
|
"perm": "r"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cmd": "device show-health-metrics name=devid,type=CephString name=sample,type=CephString,req=False",
|
"cmd": "device show-health-metrics "
|
||||||
|
"name=devid,type=CephString "
|
||||||
|
"name=sample,type=CephString,req=False",
|
||||||
"desc": "Show stored device metrics for the device",
|
"desc": "Show stored device metrics for the device",
|
||||||
"perm": "r"
|
"perm": "r"
|
||||||
},
|
},
|
||||||
@ -85,13 +89,13 @@ class Module(MgrModule):
|
|||||||
self.run = True
|
self.run = True
|
||||||
self.event = Event()
|
self.event = Event()
|
||||||
|
|
||||||
def handle_command(self, inbuf, cmd):
|
def handle_command(self, _, cmd):
|
||||||
self.log.error("handle_command")
|
self.log.error("handle_command")
|
||||||
|
|
||||||
if cmd['prefix'] == 'device query-daemon-health-metrics':
|
if cmd['prefix'] == 'device query-daemon-health-metrics':
|
||||||
who = cmd.get('who', '')
|
who = cmd.get('who', '')
|
||||||
if who[0:4] != 'osd.':
|
if who[0:4] != 'osd.':
|
||||||
return (-errno.EINVAL, '', 'not a valid <osd.NNN> id')
|
return -errno.EINVAL, '', 'not a valid <osd.NNN> id'
|
||||||
osd_id = who[4:]
|
osd_id = who[4:]
|
||||||
result = CommandResult('')
|
result = CommandResult('')
|
||||||
self.send_command(result, 'osd', osd_id, json.dumps({
|
self.send_command(result, 'osd', osd_id, json.dumps({
|
||||||
@ -99,17 +103,17 @@ class Module(MgrModule):
|
|||||||
'format': 'json',
|
'format': 'json',
|
||||||
}), '')
|
}), '')
|
||||||
r, outb, outs = result.wait()
|
r, outb, outs = result.wait()
|
||||||
return (r, outb, outs)
|
return r, outb, outs
|
||||||
elif cmd['prefix'] == 'device scrape-daemon-health-metrics':
|
elif cmd['prefix'] == 'device scrape-daemon-health-metrics':
|
||||||
who = cmd.get('who', '')
|
who = cmd.get('who', '')
|
||||||
if who[0:4] != 'osd.':
|
if who[0:4] != 'osd.':
|
||||||
return (-errno.EINVAL, '', 'not a valid <osd.NNN> id')
|
return -errno.EINVAL, '', 'not a valid <osd.NNN> id'
|
||||||
id = int(who[4:])
|
osd_id = int(who[4:])
|
||||||
return self.scrape_osd(id)
|
return self.scrape_osd(osd_id)
|
||||||
elif cmd['prefix'] == 'device scrape-health-metrics':
|
elif cmd['prefix'] == 'device scrape-health-metrics':
|
||||||
if 'devid' in cmd:
|
if 'devid' in cmd:
|
||||||
return self.scrape_device(cmd['devid'])
|
return self.scrape_device(cmd['devid'])
|
||||||
return self.scrape_all();
|
return self.scrape_all()
|
||||||
elif cmd['prefix'] == 'device show-health-metrics':
|
elif cmd['prefix'] == 'device show-health-metrics':
|
||||||
return self.show_device_metrics(cmd['devid'], cmd.get('sample'))
|
return self.show_device_metrics(cmd['devid'], cmd.get('sample'))
|
||||||
elif cmd['prefix'] == 'device check-health':
|
elif cmd['prefix'] == 'device check-health':
|
||||||
@ -136,7 +140,8 @@ class Module(MgrModule):
|
|||||||
assert before != after
|
assert before != after
|
||||||
|
|
||||||
def refresh_config(self):
|
def refresh_config(self):
|
||||||
self.enable_monitoring = self.get_config('enable_monitoring', '') is not '' or 'false'
|
self.enable_monitoring = self.get_config('enable_monitoring',
|
||||||
|
'') is not '' or 'false'
|
||||||
for opt, value in iteritems(DEFAULTS):
|
for opt, value in iteritems(DEFAULTS):
|
||||||
setattr(self, opt, self.get_config(opt) or value)
|
setattr(self, opt, self.get_config(opt) or value)
|
||||||
|
|
||||||
@ -201,7 +206,7 @@ class Module(MgrModule):
|
|||||||
assert r == 0
|
assert r == 0
|
||||||
|
|
||||||
ioctx = self.rados.open_ioctx(self.pool_name)
|
ioctx = self.rados.open_ioctx(self.pool_name)
|
||||||
return (ioctx)
|
return ioctx
|
||||||
|
|
||||||
def scrape_osd(self, osd_id):
|
def scrape_osd(self, osd_id):
|
||||||
ioctx = self.open_connection()
|
ioctx = self.open_connection()
|
||||||
@ -237,12 +242,13 @@ class Module(MgrModule):
|
|||||||
def scrape_device(self, devid):
|
def scrape_device(self, devid):
|
||||||
r = self.get("device " + devid)
|
r = self.get("device " + devid)
|
||||||
if not r or 'device' not in r.keys():
|
if not r or 'device' not in r.keys():
|
||||||
return (-errno.ENOENT, '', 'device ' + devid + ' not found')
|
return -errno.ENOENT, '', 'device ' + devid + ' not found'
|
||||||
daemons = r['device'].get('daemons', [])
|
daemons = r['device'].get('daemons', [])
|
||||||
osds = [int(r[4:]) for r in daemons if r.startswith('osd.')]
|
osds = [int(r[4:]) for r in daemons if r.startswith('osd.')]
|
||||||
if not osds:
|
if not osds:
|
||||||
return (-errno.EAGAIN, '',
|
return (-errno.EAGAIN, '',
|
||||||
'device ' + devid + ' not claimed by any active OSD daemons')
|
'device ' + devid + ' not claimed by any active '
|
||||||
|
'OSD daemons')
|
||||||
osd_id = osds[0]
|
osd_id = osds[0]
|
||||||
ioctx = self.open_connection()
|
ioctx = self.open_connection()
|
||||||
raw_smart_data = self.do_scrape_osd(osd_id, ioctx, devid=devid)
|
raw_smart_data = self.do_scrape_osd(osd_id, ioctx, devid=devid)
|
||||||
@ -279,7 +285,7 @@ class Module(MgrModule):
|
|||||||
erase = []
|
erase = []
|
||||||
try:
|
try:
|
||||||
with rados.ReadOpCtx() as op:
|
with rados.ReadOpCtx() as op:
|
||||||
iter, ret = ioctx.get_omap_keys(op, "", 500) # fixme
|
iter, ret = ioctx.get_omap_keys(op, "", 500) # fixme
|
||||||
assert ret == 0
|
assert ret == 0
|
||||||
ioctx.operate_read_op(op, devid)
|
ioctx.operate_read_op(op, devid)
|
||||||
for key, _ in list(iter):
|
for key, _ in list(iter):
|
||||||
@ -301,16 +307,16 @@ class Module(MgrModule):
|
|||||||
# verify device exists
|
# verify device exists
|
||||||
r = self.get("device " + devid)
|
r = self.get("device " + devid)
|
||||||
if not r or 'device' not in r.keys():
|
if not r or 'device' not in r.keys():
|
||||||
return (-errno.ENOENT, '', 'device ' + devid + ' not found')
|
return -errno.ENOENT, '', 'device ' + devid + ' not found'
|
||||||
# fetch metrics
|
# fetch metrics
|
||||||
ioctx = self.open_connection()
|
ioctx = self.open_connection()
|
||||||
res = {}
|
res = {}
|
||||||
with rados.ReadOpCtx() as op:
|
with rados.ReadOpCtx() as op:
|
||||||
iter, ret = ioctx.get_omap_vals(op, "", sample or '', 500) # fixme
|
omap_iter, ret = ioctx.get_omap_vals(op, "", sample or '', 500) # fixme
|
||||||
assert ret == 0
|
assert ret == 0
|
||||||
try:
|
try:
|
||||||
ioctx.operate_read_op(op, devid)
|
ioctx.operate_read_op(op, devid)
|
||||||
for key, value in list(iter):
|
for key, value in list(omap_iter):
|
||||||
if sample and key != sample:
|
if sample and key != sample:
|
||||||
break
|
break
|
||||||
try:
|
try:
|
||||||
@ -466,7 +472,6 @@ class Module(MgrModule):
|
|||||||
if r != 0:
|
if r != 0:
|
||||||
self.log.warn('Could not set osd.%s primary-affinity, r: [%s], outs: [%s]' % (osd_id, r, outb, outs))
|
self.log.warn('Could not set osd.%s primary-affinity, r: [%s], outs: [%s]' % (osd_id, r, outb, outs))
|
||||||
|
|
||||||
|
|
||||||
def extract_smart_features(self, raw):
|
def extract_smart_features(self, raw):
|
||||||
# FIXME: extract and normalize raw smartctl --json output and
|
# FIXME: extract and normalize raw smartctl --json output and
|
||||||
# generate a dict of the fields we care about.
|
# generate a dict of the fields we care about.
|
||||||
|
Loading…
Reference in New Issue
Block a user