bugfixes; stop defaulting map flags to 0 in the constmetric

This commit is contained in:
Daniel R 2022-10-07 14:53:46 -04:00
parent 1a741d7606
commit b9af3ab29f
2 changed files with 7 additions and 20 deletions

View File

@ -511,7 +511,7 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
},
),
OSDMapFlags: prometheus.NewDesc(fmt.Sprintf("%s_osd_map_flags", cephNamespace), "A metric for all OSD flags", []string{"flag"}, labels),
OSDMapFlags: prometheus.NewDesc(fmt.Sprintf("%s_osd_map_flags", cephNamespace), "A metric for all OSDMap flags", []string{"flag"}, labels),
OSDsDown: prometheus.NewDesc(fmt.Sprintf("%s_osds_down", cephNamespace), "Count of OSDs that are in DOWN state", nil, labels),
OSDsUp: prometheus.NewDesc(fmt.Sprintf("%s_osds_up", cephNamespace), "Count of OSDs that are in UP state", nil, labels),
OSDsIn: prometheus.NewDesc(fmt.Sprintf("%s_osds_in", cephNamespace), "Count of OSDs that are in IN state and available to serve requests", nil, labels),
@ -810,7 +810,6 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
}
// This stores OSD map flags that were found, so the rest can be set to 0
matchedOsdMapFlags := make(map[string]bool)
for k, check := range stats.Health.Checks {
if k == "MON_DOWN" {
matched := monsDownRegex.FindStringSubmatch(check.Summary.Message)
@ -863,10 +862,10 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
for _, f := range flags {
// Update the global metric for this specific flag
ch <- prometheus.MustNewConstMetric(c.OSDMapFlags, prometheus.GaugeValue, float64(1), f)
// Update the legacy gauges, based on the map
(*c.OSDFlagToGaugeMap[f]).Set(1)
// Mark the flag as having been set
matchedOsdMapFlags[f] = true
// Update the legacy gauges, based on the map, if valid
if _, exists := c.OSDFlagToGaugeMap[f]; exists {
(*c.OSDFlagToGaugeMap[f]).Set(1)
}
}
}
}
@ -877,13 +876,6 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
}
}
// Zero-fill the OSD Map ConstMetrics (the ones that haven't already been set to 1)
for flagKey := range c.OSDFlagToGaugeMap {
if matchedOsdMapFlags[flagKey] == false {
ch <- prometheus.MustNewConstMetric(c.OSDMapFlags, prometheus.GaugeValue, float64(0), flagKey)
}
}
var (
degradedPGs float64
activePGs float64

View File

@ -509,26 +509,21 @@ $ sudo ceph -s
"OSDMAP_FLAGS": {
"severity": "HEALTH_WARN",
"summary": {
"message": "pauserd,pausewr,noout,noin,norecover,noscrub,notieragent flag(s) set; mon 482f68d873d2 is low on available space"
"message": "pauserd,pausewr,noout,noin,norecover,noscrub,notieragent,newhypotheticalcephflag flag(s) set; mon 482f68d873d2 is low on available space"
}
}
}
}
}`,
reMatch: []*regexp.Regexp{
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="full"} 0`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="pauserd"} 1`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="pausewr"} 1`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noup"} 0`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="nodown"} 0`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noin"} 1`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noout"} 1`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="nobackfill"} 0`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="norecover"} 1`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="norebalance"} 0`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noscrub"} 1`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="nodeep_scrub"} 0`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="notieragent"} 1`),
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="newhypotheticalcephflag"} 1`),
regexp.MustCompile(`health_status_interp{cluster="ceph"} 1`),
},
},