Merge pull request #156 from yuezhu/nautilus

Add gauge for incomplete PGs
This commit is contained in:
Yue Zhu 2020-07-07 22:23:53 -04:00 committed by GitHub
commit e7c6973903
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 0 deletions

View File

@ -143,6 +143,9 @@ type ClusterHealthCollector struct {
// DownPGs depicts no. of PGs that are currently down and not able to serve traffic.
DownPGs prometheus.Gauge
// IncompletePGs depicts no. of PGs that are currently incomplete and not able to serve traffic.
IncompletePGs prometheus.Gauge
// SlowOps depicts no. of total slow ops in the cluster
SlowOps prometheus.Gauge
@ -438,6 +441,14 @@ func NewClusterHealthCollector(conn Conn, cluster string) *ClusterHealthCollecto
ConstLabels: labels,
},
),
IncompletePGs: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "incomplete_pgs",
Help: "No. of PGs in the cluster in incomplete state",
ConstLabels: labels,
},
),
// with Nautilus, SLOW_OPS has replaced both REQUEST_SLOW and REQUEST_STUCK
// therefore slow_requests is deprecated, but for backwards compatibility
// the metric name will be kept the same for the time being
@ -812,6 +823,7 @@ func (c *ClusterHealthCollector) metricsList() []prometheus.Metric {
c.ForcedRecoveryPGs,
c.ForcedBackfillPGs,
c.DownPGs,
c.IncompletePGs,
c.SlowOps,
c.DegradedObjectsCount,
c.MisplacedObjectsCount,
@ -1142,6 +1154,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
forcedRecoveryPGs float64
forcedBackfillPGs float64
downPGs float64
incompletePGs float64
pgStateCounterMap = map[string]*float64{
"degraded": &degradedPGs,
@ -1159,6 +1172,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
"forced_recovery": &forcedRecoveryPGs,
"forced_backfill": &forcedBackfillPGs,
"down": &downPGs,
"incomplete": &incompletePGs,
}
pgStateGaugeMap = map[string]prometheus.Gauge{
"degraded": c.DegradedPGs,
@ -1176,6 +1190,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
"forced_recovery": c.ForcedRecoveryPGs,
"forced_backfill": c.ForcedBackfillPGs,
"down": c.DownPGs,
"incomplete": c.IncompletePGs,
}
)

View File

@ -581,6 +581,10 @@ $ sudo ceph -s
{
"state_name": "active+forced_recovery+undersized",
"count": 1
},
{
"state_name": "remapped+incomplete",
"count": 2
}
],
"num_pgs": 9208,
@ -614,6 +618,7 @@ $ sudo ceph -s
regexp.MustCompile(`forced_recovery_pgs{cluster="ceph"} 1`),
regexp.MustCompile(`forced_backfill_pgs{cluster="ceph"} 10`),
regexp.MustCompile(`down_pgs{cluster="ceph"} 37`),
regexp.MustCompile(`incomplete_pgs{cluster="ceph"} 2`),
regexp.MustCompile(`recovery_io_bytes{cluster="ceph"} 65536`),
regexp.MustCompile(`recovery_io_keys{cluster="ceph"} 25`),
regexp.MustCompile(`recovery_io_objects{cluster="ceph"} 140`),
@ -639,6 +644,7 @@ $ sudo ceph -s
regexp.MustCompile(`pg_state{cluster="ceph",state="forced_recovery"} 1`),
regexp.MustCompile(`pg_state{cluster="ceph",state="forced_backfill"} 10`),
regexp.MustCompile(`pg_state{cluster="ceph",state="down"} 37`),
regexp.MustCompile(`pg_state{cluster="ceph",state="incomplete"} 2`),
},
},
} {