Merge pull request #110 from digitalocean/luminous_pg_down

health: add stats for down PGs
This commit is contained in:
Vaibhav Bhembre 2018-11-22 11:25:46 -05:00 committed by GitHub
commit f91b1241dc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 26 additions and 0 deletions

View File

@ -130,6 +130,9 @@ type ClusterHealthCollector struct {
// ForcedBackfillPGs depicts no. of PGs that are undergoing forced backfill.
ForcedBackfillPGs prometheus.Gauge
// DownPGs depicts no. of PGs that are currently down and not able to serve traffic.
DownPGs prometheus.Gauge
// SlowRequests depicts no. of total slow requests in the cluster
// This stat exists only for backwards compatbility.
SlowRequests prometheus.Gauge
@ -309,6 +312,14 @@ func NewClusterHealthCollector(conn Conn, cluster string) *ClusterHealthCollecto
ConstLabels: labels,
},
),
DownPGs: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "down_pgs",
Help: "No. of PGs in the cluster in down state",
ConstLabels: labels,
},
),
SlowRequests: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
@ -564,6 +575,7 @@ func (c *ClusterHealthCollector) metricsList() []prometheus.Metric {
c.BackfillWaitPGs,
c.ForcedRecoveryPGs,
c.ForcedBackfillPGs,
c.DownPGs,
c.SlowRequests,
c.DegradedObjectsCount,
c.MisplacedObjectsCount,
@ -848,6 +860,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
backfillWaitPGs float64
forcedRecoveryPGs float64
forcedBackfillPGs float64
downPGs float64
pgStateMap = map[string]*float64{
"degraded": &degradedPGs,
@ -864,6 +877,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
"backfill_wait": &backfillWaitPGs,
"forced_recovery": &forcedRecoveryPGs,
"forced_backfill": &forcedBackfillPGs,
"down": &downPGs,
}
)
@ -917,6 +931,9 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
if *pgStateMap["forced_backfill"] > 0 {
c.ForcedBackfillPGs.Set(*pgStateMap["forced_backfill"])
}
if *pgStateMap["down"] > 0 {
c.DownPGs.Set(*pgStateMap["down"])
}
c.ClientReadBytesPerSec.Set(stats.PGMap.ReadBytePerSec)
c.ClientWriteBytesPerSec.Set(stats.PGMap.WriteBytePerSec)

View File

@ -640,6 +640,14 @@ $ sudo ceph -s
"state_name": "active+undersized+remapped+backfill_wait+forced_backfill",
"count": 10
},
{
"state_name": "down",
"count": 6
},
{
"state_name": "down+remapped",
"count": 31
},
{
"state_name": "active+forced_recovery+undersized",
"count": 1
@ -675,6 +683,7 @@ $ sudo ceph -s
regexp.MustCompile(`backfill_wait_pgs{cluster="ceph"} 11`),
regexp.MustCompile(`forced_recovery_pgs{cluster="ceph"} 1`),
regexp.MustCompile(`forced_backfill_pgs{cluster="ceph"} 10`),
regexp.MustCompile(`down_pgs{cluster="ceph"} 37`),
regexp.MustCompile(`recovery_io_bytes{cluster="ceph"} 65536`),
regexp.MustCompile(`recovery_io_keys{cluster="ceph"} 25`),
regexp.MustCompile(`recovery_io_objects{cluster="ceph"} 140`),