Merge pull request #108 from digitalocean/add-recovery-backfill-stats
luminous: add recovery/backfill stats
This commit is contained in:
commit
3efbebdb4f
|
@ -106,6 +106,30 @@ type ClusterHealthCollector struct {
|
|||
// Deep scrubbing reads the data and uses checksums to ensure data integrity.
|
||||
DeepScrubbingPGs prometheus.Gauge
|
||||
|
||||
// RecoveringPGs depicts no. of PGs that are in recovering state.
|
||||
// The PGs in this state have been dequeued from recovery_wait queue and are
|
||||
// actively undergoing recovery.
|
||||
RecoveringPGs prometheus.Gauge
|
||||
|
||||
// RecoveryWaitPGs depicts no. of PGs that are in recovery_wait state.
|
||||
// The PGs in this state are still in queue to start recovery on them.
|
||||
RecoveryWaitPGs prometheus.Gauge
|
||||
|
||||
// BackfillingPGs depicts no. of PGs that are in backfilling state.
|
||||
// The PGs in this state have been dequeued from backfill_wait queue and are
|
||||
// actively undergoing recovery.
|
||||
BackfillingPGs prometheus.Gauge
|
||||
|
||||
// BackfillWaitPGs depicts no. of PGs that are in backfill_wait state.
|
||||
// The PGs in this state are still in queue to start backfill on them.
|
||||
BackfillWaitPGs prometheus.Gauge
|
||||
|
||||
// ForcedRecoveryPGs depicts no. of PGs that are undergoing forced recovery.
|
||||
ForcedRecoveryPGs prometheus.Gauge
|
||||
|
||||
// ForcedBackfillPGs depicts no. of PGs that are undergoing forced backfill.
|
||||
ForcedBackfillPGs prometheus.Gauge
|
||||
|
||||
// SlowRequests depicts no. of total slow requests in the cluster
|
||||
// This stat exists only for backwards compatbility.
|
||||
SlowRequests prometheus.Gauge
|
||||
|
@ -237,6 +261,54 @@ func NewClusterHealthCollector(conn Conn, cluster string) *ClusterHealthCollecto
|
|||
ConstLabels: labels,
|
||||
},
|
||||
),
|
||||
RecoveringPGs: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "recovering_pgs",
|
||||
Help: "No. of recovering PGs in the cluster",
|
||||
ConstLabels: labels,
|
||||
},
|
||||
),
|
||||
RecoveryWaitPGs: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "recovery_wait_pgs",
|
||||
Help: "No. of PGs in the cluster with recovery_wait state",
|
||||
ConstLabels: labels,
|
||||
},
|
||||
),
|
||||
BackfillingPGs: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "backfilling_pgs",
|
||||
Help: "No. of backfilling PGs in the cluster",
|
||||
ConstLabels: labels,
|
||||
},
|
||||
),
|
||||
BackfillWaitPGs: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "backfill_wait_pgs",
|
||||
Help: "No. of PGs in the cluster with backfill_wait state",
|
||||
ConstLabels: labels,
|
||||
},
|
||||
),
|
||||
ForcedRecoveryPGs: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "forced_recovery_pgs",
|
||||
Help: "No. of PGs in the cluster with forced_recovery state",
|
||||
ConstLabels: labels,
|
||||
},
|
||||
),
|
||||
ForcedBackfillPGs: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "forced_backfill_pgs",
|
||||
Help: "No. of PGs in the cluster with forced_backfill state",
|
||||
ConstLabels: labels,
|
||||
},
|
||||
),
|
||||
SlowRequests: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
|
@ -486,6 +558,12 @@ func (c *ClusterHealthCollector) metricsList() []prometheus.Metric {
|
|||
c.PeeringPGs,
|
||||
c.ScrubbingPGs,
|
||||
c.DeepScrubbingPGs,
|
||||
c.RecoveringPGs,
|
||||
c.RecoveryWaitPGs,
|
||||
c.BackfillingPGs,
|
||||
c.BackfillWaitPGs,
|
||||
c.ForcedRecoveryPGs,
|
||||
c.ForcedBackfillPGs,
|
||||
c.SlowRequests,
|
||||
c.DegradedObjectsCount,
|
||||
c.MisplacedObjectsCount,
|
||||
|
@ -756,24 +834,36 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
|||
}
|
||||
|
||||
var (
|
||||
degradedPGs float64
|
||||
activePGs float64
|
||||
uncleanPGs float64
|
||||
undersizedPGs float64
|
||||
peeringPGs float64
|
||||
stalePGs float64
|
||||
scrubbingPGs float64
|
||||
deepScrubbingPGs float64
|
||||
degradedPGs float64
|
||||
activePGs float64
|
||||
uncleanPGs float64
|
||||
undersizedPGs float64
|
||||
peeringPGs float64
|
||||
stalePGs float64
|
||||
scrubbingPGs float64
|
||||
deepScrubbingPGs float64
|
||||
recoveringPGs float64
|
||||
recoveryWaitPGs float64
|
||||
backfillingPGs float64
|
||||
backfillWaitPGs float64
|
||||
forcedRecoveryPGs float64
|
||||
forcedBackfillPGs float64
|
||||
|
||||
pgStateMap = map[string]*float64{
|
||||
"degraded": °radedPGs,
|
||||
"active": &activePGs,
|
||||
"unclean": &uncleanPGs,
|
||||
"undersized": &undersizedPGs,
|
||||
"peering": &peeringPGs,
|
||||
"stale": &stalePGs,
|
||||
"scrubbing": &scrubbingPGs,
|
||||
"scrubbing+deep": &deepScrubbingPGs,
|
||||
"degraded": °radedPGs,
|
||||
"active": &activePGs,
|
||||
"unclean": &uncleanPGs,
|
||||
"undersized": &undersizedPGs,
|
||||
"peering": &peeringPGs,
|
||||
"stale": &stalePGs,
|
||||
"scrubbing": &scrubbingPGs,
|
||||
"scrubbing+deep": &deepScrubbingPGs,
|
||||
"recovering": &recoveringPGs,
|
||||
"recovery_wait": &recoveryWaitPGs,
|
||||
"backfilling": &backfillingPGs,
|
||||
"backfill_wait": &backfillWaitPGs,
|
||||
"forced_recovery": &forcedRecoveryPGs,
|
||||
"forced_backfill": &forcedBackfillPGs,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -809,6 +899,24 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
|||
if *pgStateMap["scrubbing+deep"] > 0 {
|
||||
c.DeepScrubbingPGs.Set(*pgStateMap["scrubbing+deep"])
|
||||
}
|
||||
if *pgStateMap["recovering"] > 0 {
|
||||
c.RecoveringPGs.Set(*pgStateMap["recovering"])
|
||||
}
|
||||
if *pgStateMap["recovery_wait"] > 0 {
|
||||
c.RecoveryWaitPGs.Set(*pgStateMap["recovery_wait"])
|
||||
}
|
||||
if *pgStateMap["backfilling"] > 0 {
|
||||
c.BackfillingPGs.Set(*pgStateMap["backfilling"])
|
||||
}
|
||||
if *pgStateMap["backfill_wait"] > 0 {
|
||||
c.BackfillWaitPGs.Set(*pgStateMap["backfill_wait"])
|
||||
}
|
||||
if *pgStateMap["forced_recovery"] > 0 {
|
||||
c.ForcedRecoveryPGs.Set(*pgStateMap["forced_recovery"])
|
||||
}
|
||||
if *pgStateMap["forced_backfill"] > 0 {
|
||||
c.ForcedBackfillPGs.Set(*pgStateMap["forced_backfill"])
|
||||
}
|
||||
|
||||
c.ClientReadBytesPerSec.Set(stats.PGMap.ReadBytePerSec)
|
||||
c.ClientWriteBytesPerSec.Set(stats.PGMap.WriteBytePerSec)
|
||||
|
|
|
@ -615,7 +615,35 @@ $ sudo ceph -s
|
|||
{
|
||||
"count": 10,
|
||||
"state_name": "scrubbing+deep"
|
||||
}
|
||||
},
|
||||
{
|
||||
"state_name": "remapped+recovering",
|
||||
"count": 5
|
||||
},
|
||||
{
|
||||
"state_name": "active+remapped+backfilling",
|
||||
"count": 2
|
||||
},
|
||||
{
|
||||
"state_name": "recovery_wait+inconsistent",
|
||||
"count": 2
|
||||
},
|
||||
{
|
||||
"state_name": "recovery_wait+remapped",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"state_name": "active+undersized+remapped+backfill_wait",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"state_name": "active+undersized+remapped+backfill_wait+forced_backfill",
|
||||
"count": 10
|
||||
},
|
||||
{
|
||||
"state_name": "active+forced_recovery+undersized",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"num_pgs": 9208,
|
||||
"num_pools": 29,
|
||||
|
@ -633,14 +661,20 @@ $ sudo ceph -s
|
|||
}
|
||||
}`,
|
||||
regexes: []*regexp.Regexp{
|
||||
regexp.MustCompile(`active_pgs{cluster="ceph"} 30`),
|
||||
regexp.MustCompile(`active_pgs{cluster="ceph"} 44`),
|
||||
regexp.MustCompile(`degraded_pgs{cluster="ceph"} 40`),
|
||||
regexp.MustCompile(`unclean_pgs{cluster="ceph"} 30`),
|
||||
regexp.MustCompile(`undersized_pgs{cluster="ceph"} 40`),
|
||||
regexp.MustCompile(`undersized_pgs{cluster="ceph"} 52`),
|
||||
regexp.MustCompile(`stale_pgs{cluster="ceph"} 30`),
|
||||
regexp.MustCompile(`peering_pgs{cluster="ceph"} 10`),
|
||||
regexp.MustCompile(`scrubbing_pgs{cluster="ceph"} 20`),
|
||||
regexp.MustCompile(`deep_scrubbing_pgs{cluster="ceph"} 10`),
|
||||
regexp.MustCompile(`recovering_pgs{cluster="ceph"} 5`),
|
||||
regexp.MustCompile(`recovery_wait_pgs{cluster="ceph"} 3`),
|
||||
regexp.MustCompile(`backfilling_pgs{cluster="ceph"} 2`),
|
||||
regexp.MustCompile(`backfill_wait_pgs{cluster="ceph"} 11`),
|
||||
regexp.MustCompile(`forced_recovery_pgs{cluster="ceph"} 1`),
|
||||
regexp.MustCompile(`forced_backfill_pgs{cluster="ceph"} 10`),
|
||||
regexp.MustCompile(`recovery_io_bytes{cluster="ceph"} 65536`),
|
||||
regexp.MustCompile(`recovery_io_keys{cluster="ceph"} 25`),
|
||||
regexp.MustCompile(`recovery_io_objects{cluster="ceph"} 140`),
|
||||
|
|
Loading…
Reference in New Issue