Merge pull request #110 from digitalocean/luminous_pg_down
health: add stats for down PGs
This commit is contained in:
commit
f91b1241dc
|
@ -130,6 +130,9 @@ type ClusterHealthCollector struct {
|
|||
// ForcedBackfillPGs depicts no. of PGs that are undergoing forced backfill.
|
||||
ForcedBackfillPGs prometheus.Gauge
|
||||
|
||||
// DownPGs depicts no. of PGs that are currently down and not able to serve traffic.
|
||||
DownPGs prometheus.Gauge
|
||||
|
||||
// SlowRequests depicts no. of total slow requests in the cluster
|
||||
// This stat exists only for backwards compatbility.
|
||||
SlowRequests prometheus.Gauge
|
||||
|
@ -309,6 +312,14 @@ func NewClusterHealthCollector(conn Conn, cluster string) *ClusterHealthCollecto
|
|||
ConstLabels: labels,
|
||||
},
|
||||
),
|
||||
DownPGs: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "down_pgs",
|
||||
Help: "No. of PGs in the cluster in down state",
|
||||
ConstLabels: labels,
|
||||
},
|
||||
),
|
||||
SlowRequests: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
|
@ -564,6 +575,7 @@ func (c *ClusterHealthCollector) metricsList() []prometheus.Metric {
|
|||
c.BackfillWaitPGs,
|
||||
c.ForcedRecoveryPGs,
|
||||
c.ForcedBackfillPGs,
|
||||
c.DownPGs,
|
||||
c.SlowRequests,
|
||||
c.DegradedObjectsCount,
|
||||
c.MisplacedObjectsCount,
|
||||
|
@ -848,6 +860,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
|||
backfillWaitPGs float64
|
||||
forcedRecoveryPGs float64
|
||||
forcedBackfillPGs float64
|
||||
downPGs float64
|
||||
|
||||
pgStateMap = map[string]*float64{
|
||||
"degraded": °radedPGs,
|
||||
|
@ -864,6 +877,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
|||
"backfill_wait": &backfillWaitPGs,
|
||||
"forced_recovery": &forcedRecoveryPGs,
|
||||
"forced_backfill": &forcedBackfillPGs,
|
||||
"down": &downPGs,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -917,6 +931,9 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
|||
if *pgStateMap["forced_backfill"] > 0 {
|
||||
c.ForcedBackfillPGs.Set(*pgStateMap["forced_backfill"])
|
||||
}
|
||||
if *pgStateMap["down"] > 0 {
|
||||
c.DownPGs.Set(*pgStateMap["down"])
|
||||
}
|
||||
|
||||
c.ClientReadBytesPerSec.Set(stats.PGMap.ReadBytePerSec)
|
||||
c.ClientWriteBytesPerSec.Set(stats.PGMap.WriteBytePerSec)
|
||||
|
|
|
@ -640,6 +640,14 @@ $ sudo ceph -s
|
|||
"state_name": "active+undersized+remapped+backfill_wait+forced_backfill",
|
||||
"count": 10
|
||||
},
|
||||
{
|
||||
"state_name": "down",
|
||||
"count": 6
|
||||
},
|
||||
{
|
||||
"state_name": "down+remapped",
|
||||
"count": 31
|
||||
},
|
||||
{
|
||||
"state_name": "active+forced_recovery+undersized",
|
||||
"count": 1
|
||||
|
@ -675,6 +683,7 @@ $ sudo ceph -s
|
|||
regexp.MustCompile(`backfill_wait_pgs{cluster="ceph"} 11`),
|
||||
regexp.MustCompile(`forced_recovery_pgs{cluster="ceph"} 1`),
|
||||
regexp.MustCompile(`forced_backfill_pgs{cluster="ceph"} 10`),
|
||||
regexp.MustCompile(`down_pgs{cluster="ceph"} 37`),
|
||||
regexp.MustCompile(`recovery_io_bytes{cluster="ceph"} 65536`),
|
||||
regexp.MustCompile(`recovery_io_keys{cluster="ceph"} 25`),
|
||||
regexp.MustCompile(`recovery_io_objects{cluster="ceph"} 140`),
|
||||
|
|
Loading…
Reference in New Issue