mirror of
https://github.com/digitalocean/ceph_exporter
synced 2025-02-08 23:47:05 +00:00
Merge pull request #204 from digitalocean/repair-counter
collectors/health: add repair state checking
This commit is contained in:
commit
e8ea7d7e66
@ -158,6 +158,9 @@ type ClusterHealthCollector struct {
|
||||
// SnaptrimWaitPGs depicts no. of PGs that are currently waiting to snaptrim
|
||||
SnaptrimWaitPGs prometheus.Gauge
|
||||
|
||||
// RepairingPGs depicts no. of PGs that are currently repairing
|
||||
RepairingPGs prometheus.Gauge
|
||||
|
||||
// SlowOps depicts no. of total slow ops in the cluster
|
||||
SlowOps prometheus.Gauge
|
||||
|
||||
@ -507,6 +510,14 @@ func NewClusterHealthCollector(conn Conn, cluster string, logger *logrus.Logger)
|
||||
ConstLabels: labels,
|
||||
},
|
||||
),
|
||||
RepairingPGs: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "repairing_pgs",
|
||||
Help: "No. of PGs in the cluster with repair state",
|
||||
ConstLabels: labels,
|
||||
},
|
||||
),
|
||||
// with Nautilus, SLOW_OPS has replaced both REQUEST_SLOW and REQUEST_STUCK
|
||||
// therefore slow_requests is deprecated, but for backwards compatibility
|
||||
// the metric name will be kept the same for the time being
|
||||
@ -916,6 +927,7 @@ func (c *ClusterHealthCollector) metricsList() []prometheus.Metric {
|
||||
c.InconsistentPGs,
|
||||
c.SnaptrimPGs,
|
||||
c.SnaptrimWaitPGs,
|
||||
c.RepairingPGs,
|
||||
c.SlowOps,
|
||||
c.DegradedObjectsCount,
|
||||
c.MisplacedObjectsCount,
|
||||
@ -1252,6 +1264,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
||||
inconsistentPGs float64
|
||||
snaptrimPGs float64
|
||||
snaptrimWaitPGs float64
|
||||
repairingPGs float64
|
||||
|
||||
pgStateCounterMap = map[string]*float64{
|
||||
"degraded": °radedPGs,
|
||||
@ -1273,6 +1286,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
||||
"inconsistent": &inconsistentPGs,
|
||||
"snaptrim": &snaptrimPGs,
|
||||
"snaptrim_wait": &snaptrimWaitPGs,
|
||||
"repair": &repairingPGs,
|
||||
}
|
||||
pgStateGaugeMap = map[string]prometheus.Gauge{
|
||||
"degraded": c.DegradedPGs,
|
||||
@ -1294,6 +1308,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
||||
"inconsistent": c.InconsistentPGs,
|
||||
"snaptrim": c.SnaptrimPGs,
|
||||
"snaptrim_wait": c.SnaptrimWaitPGs,
|
||||
"repair": c.RepairingPGs,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -340,6 +340,10 @@ $ sudo ceph -s
|
||||
"state_name": "active+clean+inconsistent",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"state_name": "active+clean+repair",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"state_name": "active+clean+snaptrim",
|
||||
"count": 15
|
||||
@ -362,6 +366,7 @@ $ sudo ceph -s
|
||||
regexp.MustCompile(`cluster_objects{cluster="ceph"} 13156`),
|
||||
regexp.MustCompile(`snaptrim_pgs{cluster="ceph"} 15`),
|
||||
regexp.MustCompile(`snaptrim_wait_pgs{cluster="ceph"} 25`),
|
||||
regexp.MustCompile(`repair{cluster="ceph"} 1`),
|
||||
},
|
||||
},
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user