mgr/zabbix: Send max, min and avg PGs of OSDs to Zabbix

We already send the max, min and avg fill ratio of OSDs but
knowing the OSD with the highest amount of PGs is also useful.

This allows admins to create a trigger should it happen that there
is a OSD with too many PGs.

This could happen if a lot of OSDs fail and PGs start to move filling
up one or more OSDs with many PGs.

As PGs eat CPU and Memory people usually like to watch out for these
situations.

Signed-off-by: Wido den Hollander <wido@42on.com>
This commit is contained in:
Wido den Hollander 2018-03-26 13:27:27 +02:00
parent 4198558162
commit 582935f222
No known key found for this signature in database
GPG Key ID: 019B582DDB3ECA42
2 changed files with 134 additions and 0 deletions

View File

@ -172,6 +172,7 @@ class Module(MgrModule):
data['num_osd_in'] = num_in
osd_fill = list()
osd_pgs = list()
osd_apply_latency_ns = list()
osd_commit_latency_ns = list()
@ -180,6 +181,7 @@ class Module(MgrModule):
if osd['kb'] == 0:
continue
osd_fill.append((float(osd['kb_used']) / float(osd['kb'])) * 100)
osd_pgs.append(osd['num_pgs'])
osd_apply_latency_ns.append(osd['perf_stat']['apply_latency_ns'])
osd_commit_latency_ns.append(osd['perf_stat']['commit_latency_ns'])
@ -187,6 +189,9 @@ class Module(MgrModule):
data['osd_max_fill'] = max(osd_fill)
data['osd_min_fill'] = min(osd_fill)
data['osd_avg_fill'] = avg(osd_fill)
data['osd_max_pgs'] = max(osd_pgs)
data['osd_min_pgs'] = min(osd_pgs)
data['osd_avg_pgs'] = avg(osd_pgs)
except ValueError:
pass

View File

@ -367,6 +367,135 @@
<valuemap/>
<logtimefmt/>
</item>
<item>
<name>Ceph OSD max PGs</name>
<type>2</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>ceph.osd_max_pgs</key>
<delay>0</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description>Maximum amount of PGs on OSDs</description>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Ceph</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
</item>
<item>
<name>Ceph OSD min PGs</name>
<type>2</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>ceph.osd_min_pgs</key>
<delay>0</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description>Minimum amount of PGs on OSDs</description>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Ceph</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
</item>
<item>
<name>Ceph OSD avg PGs</name>
<type>2</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>ceph.osd_avg_pgs</key>
<delay>0</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description>Average amount of PGs on OSDs</description>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Ceph</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
</item>
<item>
<name>Ceph backfill full ratio</name>
<type>2</type>