introduce constmetrics for osdmap flags
This commit is contained in:
parent
c3a3d581aa
commit
1a741d7606
|
@ -203,6 +203,11 @@ type ClusterHealthCollector struct {
|
||||||
OSDMapFlagNoDeepScrub prometheus.Gauge
|
OSDMapFlagNoDeepScrub prometheus.Gauge
|
||||||
OSDMapFlagNoTierAgent prometheus.Gauge
|
OSDMapFlagNoTierAgent prometheus.Gauge
|
||||||
|
|
||||||
|
// OSDMapFlags, but implemented as a ConstMetric and each flag is a label
|
||||||
|
OSDMapFlags *prometheus.Desc
|
||||||
|
// OSDFlagToGaugeMap maps flags to gauges
|
||||||
|
OSDFlagToGaugeMap map[string]*prometheus.Gauge
|
||||||
|
|
||||||
// OSDsDown show the no. of OSDs that are in the DOWN state.
|
// OSDsDown show the no. of OSDs that are in the DOWN state.
|
||||||
OSDsDown *prometheus.Desc
|
OSDsDown *prometheus.Desc
|
||||||
|
|
||||||
|
@ -505,6 +510,8 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
|
||||||
ConstLabels: labels,
|
ConstLabels: labels,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
|
||||||
|
OSDMapFlags: prometheus.NewDesc(fmt.Sprintf("%s_osd_map_flags", cephNamespace), "A metric for all OSD flags", []string{"flag"}, labels),
|
||||||
OSDsDown: prometheus.NewDesc(fmt.Sprintf("%s_osds_down", cephNamespace), "Count of OSDs that are in DOWN state", nil, labels),
|
OSDsDown: prometheus.NewDesc(fmt.Sprintf("%s_osds_down", cephNamespace), "Count of OSDs that are in DOWN state", nil, labels),
|
||||||
OSDsUp: prometheus.NewDesc(fmt.Sprintf("%s_osds_up", cephNamespace), "Count of OSDs that are in UP state", nil, labels),
|
OSDsUp: prometheus.NewDesc(fmt.Sprintf("%s_osds_up", cephNamespace), "Count of OSDs that are in UP state", nil, labels),
|
||||||
OSDsIn: prometheus.NewDesc(fmt.Sprintf("%s_osds_in", cephNamespace), "Count of OSDs that are in IN state and available to serve requests", nil, labels),
|
OSDsIn: prometheus.NewDesc(fmt.Sprintf("%s_osds_in", cephNamespace), "Count of OSDs that are in IN state and available to serve requests", nil, labels),
|
||||||
|
@ -526,6 +533,23 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
|
||||||
RbdMirrorUp: prometheus.NewDesc(fmt.Sprintf("%s_rbd_mirror_up", cephNamespace), "Alive rbd-mirror daemons", []string{"name"}, labels),
|
RbdMirrorUp: prometheus.NewDesc(fmt.Sprintf("%s_rbd_mirror_up", cephNamespace), "Alive rbd-mirror daemons", []string{"name"}, labels),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is here to support backwards compatibility with gauges, but also exists as a general list of possible flags
|
||||||
|
collector.OSDFlagToGaugeMap = map[string]*prometheus.Gauge{
|
||||||
|
"full": &collector.OSDMapFlagFull,
|
||||||
|
"pauserd": &collector.OSDMapFlagPauseRd,
|
||||||
|
"pausewr": &collector.OSDMapFlagPauseWr,
|
||||||
|
"noup": &collector.OSDMapFlagNoUp,
|
||||||
|
"nodown": &collector.OSDMapFlagNoDown,
|
||||||
|
"noin": &collector.OSDMapFlagNoIn,
|
||||||
|
"noout": &collector.OSDMapFlagNoOut,
|
||||||
|
"nobackfill": &collector.OSDMapFlagNoBackfill,
|
||||||
|
"norecover": &collector.OSDMapFlagNoRecover,
|
||||||
|
"norebalance": &collector.OSDMapFlagNoRebalance,
|
||||||
|
"noscrub": &collector.OSDMapFlagNoScrub,
|
||||||
|
"nodeep_scrub": &collector.OSDMapFlagNoDeepScrub,
|
||||||
|
"notieragent": &collector.OSDMapFlagNoTierAgent,
|
||||||
|
}
|
||||||
|
|
||||||
if exporter.Version.IsAtLeast(Pacific) {
|
if exporter.Version.IsAtLeast(Pacific) {
|
||||||
// pacific adds the DAEMON_OLD_VERSION health check
|
// pacific adds the DAEMON_OLD_VERSION health check
|
||||||
// that indicates that multiple versions of Ceph have been running for longer than mon_warn_older_version_delay
|
// that indicates that multiple versions of Ceph have been running for longer than mon_warn_older_version_delay
|
||||||
|
@ -785,6 +809,8 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This stores OSD map flags that were found, so the rest can be set to 0
|
||||||
|
matchedOsdMapFlags := make(map[string]bool)
|
||||||
for k, check := range stats.Health.Checks {
|
for k, check := range stats.Health.Checks {
|
||||||
if k == "MON_DOWN" {
|
if k == "MON_DOWN" {
|
||||||
matched := monsDownRegex.FindStringSubmatch(check.Summary.Message)
|
matched := monsDownRegex.FindStringSubmatch(check.Summary.Message)
|
||||||
|
@ -835,37 +861,14 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
||||||
if len(matched) > 0 {
|
if len(matched) > 0 {
|
||||||
flags := strings.Split(matched[1], ",")
|
flags := strings.Split(matched[1], ",")
|
||||||
for _, f := range flags {
|
for _, f := range flags {
|
||||||
switch f {
|
// Update the global metric for this specific flag
|
||||||
case "full":
|
ch <- prometheus.MustNewConstMetric(c.OSDMapFlags, prometheus.GaugeValue, float64(1), f)
|
||||||
c.OSDMapFlagFull.Set(1)
|
// Update the legacy gauges, based on the map
|
||||||
case "pauserd":
|
(*c.OSDFlagToGaugeMap[f]).Set(1)
|
||||||
c.OSDMapFlagPauseRd.Set(1)
|
// Mark the flag as having been set
|
||||||
case "pausewr":
|
matchedOsdMapFlags[f] = true
|
||||||
c.OSDMapFlagPauseWr.Set(1)
|
|
||||||
case "noup":
|
|
||||||
c.OSDMapFlagNoUp.Set(1)
|
|
||||||
case "nodown":
|
|
||||||
c.OSDMapFlagNoDown.Set(1)
|
|
||||||
case "noin":
|
|
||||||
c.OSDMapFlagNoIn.Set(1)
|
|
||||||
case "noout":
|
|
||||||
c.OSDMapFlagNoOut.Set(1)
|
|
||||||
case "nobackfill":
|
|
||||||
c.OSDMapFlagNoBackfill.Set(1)
|
|
||||||
case "norecover":
|
|
||||||
c.OSDMapFlagNoRecover.Set(1)
|
|
||||||
case "norebalance":
|
|
||||||
c.OSDMapFlagNoRebalance.Set(1)
|
|
||||||
case "noscrub":
|
|
||||||
c.OSDMapFlagNoScrub.Set(1)
|
|
||||||
case "nodeep_scrub":
|
|
||||||
c.OSDMapFlagNoDeepScrub.Set(1)
|
|
||||||
case "notieragent":
|
|
||||||
c.OSDMapFlagNoTierAgent.Set(1)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
if !mapEmpty {
|
if !mapEmpty {
|
||||||
if val, present := c.healthChecksMap[k]; present {
|
if val, present := c.healthChecksMap[k]; present {
|
||||||
|
@ -874,6 +877,13 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Zero-fill the OSD Map ConstMetrics (the ones that haven't already been set to 1)
|
||||||
|
for flagKey := range c.OSDFlagToGaugeMap {
|
||||||
|
if matchedOsdMapFlags[flagKey] == false {
|
||||||
|
ch <- prometheus.MustNewConstMetric(c.OSDMapFlags, prometheus.GaugeValue, float64(0), flagKey)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
degradedPGs float64
|
degradedPGs float64
|
||||||
activePGs float64
|
activePGs float64
|
||||||
|
|
|
@ -500,6 +500,38 @@ $ sudo ceph -s
|
||||||
regexp.MustCompile(`health_status_interp{cluster="ceph"} 1`),
|
regexp.MustCompile(`health_status_interp{cluster="ceph"} 1`),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "many flags set new osdmap constmetrics filled",
|
||||||
|
input: `
|
||||||
|
{
|
||||||
|
"health": {
|
||||||
|
"checks": {
|
||||||
|
"OSDMAP_FLAGS": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "pauserd,pausewr,noout,noin,norecover,noscrub,notieragent flag(s) set; mon 482f68d873d2 is low on available space"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}`,
|
||||||
|
reMatch: []*regexp.Regexp{
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="full"} 0`),
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="pauserd"} 1`),
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="pausewr"} 1`),
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noup"} 0`),
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="nodown"} 0`),
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noin"} 1`),
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noout"} 1`),
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="nobackfill"} 0`),
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="norecover"} 1`),
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="norebalance"} 0`),
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noscrub"} 1`),
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="nodeep_scrub"} 0`),
|
||||||
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="notieragent"} 1`),
|
||||||
|
regexp.MustCompile(`health_status_interp{cluster="ceph"} 1`),
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "lots of PG data",
|
name: "lots of PG data",
|
||||||
input: `
|
input: `
|
||||||
|
|
Loading…
Reference in New Issue