diff --git a/collectors/health.go b/collectors/health.go index 0f389e8..cd43f51 100644 --- a/collectors/health.go +++ b/collectors/health.go @@ -97,6 +97,9 @@ type ClusterHealthCollector struct { // Deep scrubbing reads the data and uses checksums to ensure data integrity. DeepScrubbingPGs prometheus.Gauge + // SlowRequests depicts no. of total slow requests in the cluster + SlowRequests prometheus.Gauge + // DegradedObjectsCount gives the no. of RADOS objects are constitute the degraded PGs. // This includes object replicas in its count. DegradedObjectsCount prometheus.Gauge @@ -212,6 +215,14 @@ func NewClusterHealthCollector(conn Conn, cluster string) *ClusterHealthCollecto ConstLabels: labels, }, ), + SlowRequests: prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: cephNamespace, + Name: "slow_requests", + Help: "No. of slow requests", + ConstLabels: labels, + }, + ), DegradedPGs: prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: cephNamespace, @@ -437,6 +448,7 @@ func (c *ClusterHealthCollector) metricsList() []prometheus.Metric { c.StuckStalePGs, c.ScrubbingPGs, c.DeepScrubbingPGs, + c.SlowRequests, c.DegradedObjectsCount, c.MisplacedObjectsCount, c.OSDsDown, @@ -521,6 +533,7 @@ func (c *ClusterHealthCollector) collect() error { stuckUndersizedRegex = regexp.MustCompile(`([\d]+) pgs stuck undersized`) staleRegex = regexp.MustCompile(`([\d]+) pgs stale`) stuckStaleRegex = regexp.MustCompile(`([\d]+) pgs stuck stale`) + slowRequestRegex = regexp.MustCompile(`([\d]+) requests are blocked`) degradedObjectsRegex = regexp.MustCompile(`recovery ([\d]+)/([\d]+) objects degraded`) misplacedObjectsRegex = regexp.MustCompile(`recovery ([\d]+)/([\d]+) objects misplaced`) ) @@ -598,6 +611,15 @@ func (c *ClusterHealthCollector) collect() error { c.StuckStalePGs.Set(float64(v)) } + matched = slowRequestRegex.FindStringSubmatch(s.Summary) + if len(matched) == 2 { + v, err := strconv.Atoi(matched[1]) + if err != nil { + return err + } + c.SlowRequests.Set(float64(v)) + } + matched = degradedObjectsRegex.FindStringSubmatch(s.Summary) if len(matched) == 3 { v, err := strconv.Atoi(matched[1]) diff --git a/collectors/health_test.go b/collectors/health_test.go index 4d28729..915f339 100644 --- a/collectors/health_test.go +++ b/collectors/health_test.go @@ -372,6 +372,22 @@ $ sudo ceph -s regexp.MustCompile(`deep_scrubbing_pgs{cluster="ceph"} 5`), }, }, + { + input: ` +{ + "health": { + "summary": [ + { + "severity": "HEALTH_WARN", + "summary": "6 requests are blocked > 32 sec" + } + ] + } +}`, + regexes: []*regexp.Regexp{ + regexp.MustCompile(`slow_requests{cluster="ceph"} 6`), + }, + }, } { func() { collector := NewClusterHealthCollector(NewNoopConn(tt.input), "ceph")