introduce constmetrics for osdmap flags
This commit is contained in:
parent
c3a3d581aa
commit
1a741d7606
|
@ -203,6 +203,11 @@ type ClusterHealthCollector struct {
|
|||
OSDMapFlagNoDeepScrub prometheus.Gauge
|
||||
OSDMapFlagNoTierAgent prometheus.Gauge
|
||||
|
||||
// OSDMapFlags, but implemented as a ConstMetric and each flag is a label
|
||||
OSDMapFlags *prometheus.Desc
|
||||
// OSDFlagToGaugeMap maps flags to gauges
|
||||
OSDFlagToGaugeMap map[string]*prometheus.Gauge
|
||||
|
||||
// OSDsDown show the no. of OSDs that are in the DOWN state.
|
||||
OSDsDown *prometheus.Desc
|
||||
|
||||
|
@ -505,6 +510,8 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
|
|||
ConstLabels: labels,
|
||||
},
|
||||
),
|
||||
|
||||
OSDMapFlags: prometheus.NewDesc(fmt.Sprintf("%s_osd_map_flags", cephNamespace), "A metric for all OSD flags", []string{"flag"}, labels),
|
||||
OSDsDown: prometheus.NewDesc(fmt.Sprintf("%s_osds_down", cephNamespace), "Count of OSDs that are in DOWN state", nil, labels),
|
||||
OSDsUp: prometheus.NewDesc(fmt.Sprintf("%s_osds_up", cephNamespace), "Count of OSDs that are in UP state", nil, labels),
|
||||
OSDsIn: prometheus.NewDesc(fmt.Sprintf("%s_osds_in", cephNamespace), "Count of OSDs that are in IN state and available to serve requests", nil, labels),
|
||||
|
@ -526,6 +533,23 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
|
|||
RbdMirrorUp: prometheus.NewDesc(fmt.Sprintf("%s_rbd_mirror_up", cephNamespace), "Alive rbd-mirror daemons", []string{"name"}, labels),
|
||||
}
|
||||
|
||||
// This is here to support backwards compatibility with gauges, but also exists as a general list of possible flags
|
||||
collector.OSDFlagToGaugeMap = map[string]*prometheus.Gauge{
|
||||
"full": &collector.OSDMapFlagFull,
|
||||
"pauserd": &collector.OSDMapFlagPauseRd,
|
||||
"pausewr": &collector.OSDMapFlagPauseWr,
|
||||
"noup": &collector.OSDMapFlagNoUp,
|
||||
"nodown": &collector.OSDMapFlagNoDown,
|
||||
"noin": &collector.OSDMapFlagNoIn,
|
||||
"noout": &collector.OSDMapFlagNoOut,
|
||||
"nobackfill": &collector.OSDMapFlagNoBackfill,
|
||||
"norecover": &collector.OSDMapFlagNoRecover,
|
||||
"norebalance": &collector.OSDMapFlagNoRebalance,
|
||||
"noscrub": &collector.OSDMapFlagNoScrub,
|
||||
"nodeep_scrub": &collector.OSDMapFlagNoDeepScrub,
|
||||
"notieragent": &collector.OSDMapFlagNoTierAgent,
|
||||
}
|
||||
|
||||
if exporter.Version.IsAtLeast(Pacific) {
|
||||
// pacific adds the DAEMON_OLD_VERSION health check
|
||||
// that indicates that multiple versions of Ceph have been running for longer than mon_warn_older_version_delay
|
||||
|
@ -785,6 +809,8 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
|||
}
|
||||
}
|
||||
|
||||
// This stores OSD map flags that were found, so the rest can be set to 0
|
||||
matchedOsdMapFlags := make(map[string]bool)
|
||||
for k, check := range stats.Health.Checks {
|
||||
if k == "MON_DOWN" {
|
||||
matched := monsDownRegex.FindStringSubmatch(check.Summary.Message)
|
||||
|
@ -835,37 +861,14 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
|||
if len(matched) > 0 {
|
||||
flags := strings.Split(matched[1], ",")
|
||||
for _, f := range flags {
|
||||
switch f {
|
||||
case "full":
|
||||
c.OSDMapFlagFull.Set(1)
|
||||
case "pauserd":
|
||||
c.OSDMapFlagPauseRd.Set(1)
|
||||
case "pausewr":
|
||||
c.OSDMapFlagPauseWr.Set(1)
|
||||
case "noup":
|
||||
c.OSDMapFlagNoUp.Set(1)
|
||||
case "nodown":
|
||||
c.OSDMapFlagNoDown.Set(1)
|
||||
case "noin":
|
||||
c.OSDMapFlagNoIn.Set(1)
|
||||
case "noout":
|
||||
c.OSDMapFlagNoOut.Set(1)
|
||||
case "nobackfill":
|
||||
c.OSDMapFlagNoBackfill.Set(1)
|
||||
case "norecover":
|
||||
c.OSDMapFlagNoRecover.Set(1)
|
||||
case "norebalance":
|
||||
c.OSDMapFlagNoRebalance.Set(1)
|
||||
case "noscrub":
|
||||
c.OSDMapFlagNoScrub.Set(1)
|
||||
case "nodeep_scrub":
|
||||
c.OSDMapFlagNoDeepScrub.Set(1)
|
||||
case "notieragent":
|
||||
c.OSDMapFlagNoTierAgent.Set(1)
|
||||
}
|
||||
// Update the global metric for this specific flag
|
||||
ch <- prometheus.MustNewConstMetric(c.OSDMapFlags, prometheus.GaugeValue, float64(1), f)
|
||||
// Update the legacy gauges, based on the map
|
||||
(*c.OSDFlagToGaugeMap[f]).Set(1)
|
||||
// Mark the flag as having been set
|
||||
matchedOsdMapFlags[f] = true
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if !mapEmpty {
|
||||
if val, present := c.healthChecksMap[k]; present {
|
||||
|
@ -874,6 +877,13 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
|||
}
|
||||
}
|
||||
|
||||
// Zero-fill the OSD Map ConstMetrics (the ones that haven't already been set to 1)
|
||||
for flagKey := range c.OSDFlagToGaugeMap {
|
||||
if matchedOsdMapFlags[flagKey] == false {
|
||||
ch <- prometheus.MustNewConstMetric(c.OSDMapFlags, prometheus.GaugeValue, float64(0), flagKey)
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
degradedPGs float64
|
||||
activePGs float64
|
||||
|
|
|
@ -500,6 +500,38 @@ $ sudo ceph -s
|
|||
regexp.MustCompile(`health_status_interp{cluster="ceph"} 1`),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "many flags set new osdmap constmetrics filled",
|
||||
input: `
|
||||
{
|
||||
"health": {
|
||||
"checks": {
|
||||
"OSDMAP_FLAGS": {
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": {
|
||||
"message": "pauserd,pausewr,noout,noin,norecover,noscrub,notieragent flag(s) set; mon 482f68d873d2 is low on available space"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}`,
|
||||
reMatch: []*regexp.Regexp{
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="full"} 0`),
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="pauserd"} 1`),
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="pausewr"} 1`),
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noup"} 0`),
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="nodown"} 0`),
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noin"} 1`),
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noout"} 1`),
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="nobackfill"} 0`),
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="norecover"} 1`),
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="norebalance"} 0`),
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noscrub"} 1`),
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="nodeep_scrub"} 0`),
|
||||
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="notieragent"} 1`),
|
||||
regexp.MustCompile(`health_status_interp{cluster="ceph"} 1`),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "lots of PG data",
|
||||
input: `
|
||||
|
|
Loading…
Reference in New Issue