health: add scrubbing and deep-scrubbing pgs

Parse pg statuses from ceph status and sum up the numbers of pgs
that have "scrubbing" or "scrubbing+deep" state name.

Fixes: https://github.com/digitalocean/ceph_exporter/issues/22
This commit is contained in:
Craig Chi 2017-04-22 22:08:12 +08:00
parent 4407414149
commit 71563b1293
2 changed files with 83 additions and 0 deletions

View File

@ -89,6 +89,14 @@ type ClusterHealthCollector struct {
// in that state.
StuckStalePGs prometheus.Gauge
// ScrubbingPGs depicts no. of PGs that are in scrubbing state.
// Light scrubbing checks the object size and attributes.
ScrubbingPGs prometheus.Gauge
// DeepScrubbingPGs depicts no. of PGs that are in scrubbing+deep state.
// Deep scrubbing reads the data and uses checksums to ensure data integrity.
DeepScrubbingPGs prometheus.Gauge
// DegradedObjectsCount gives the no. of RADOS objects are constitute the degraded PGs.
// This includes object replicas in its count.
DegradedObjectsCount prometheus.Gauge
@ -188,6 +196,22 @@ func NewClusterHealthCollector(conn Conn, cluster string) *ClusterHealthCollecto
ConstLabels: labels,
},
),
ScrubbingPGs: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "scrubbing_pgs",
Help: "No. of scrubbing PGs in the cluster",
ConstLabels: labels,
},
),
DeepScrubbingPGs: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "deep_scrubbing_pgs",
Help: "No. of deep scrubbing PGs in the cluster",
ConstLabels: labels,
},
),
DegradedPGs: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
@ -411,6 +435,8 @@ func (c *ClusterHealthCollector) metricsList() []prometheus.Metric {
c.StuckUndersizedPGs,
c.StalePGs,
c.StuckStalePGs,
c.ScrubbingPGs,
c.DeepScrubbingPGs,
c.DegradedObjectsCount,
c.MisplacedObjectsCount,
c.OSDsDown,
@ -449,6 +475,10 @@ type cephHealthStats struct {
} `json:"osdmap"`
} `json:"osdmap"`
PGMap struct {
PGsByState []struct {
StateName string `json:"state_name"`
Count json.Number `json:"count"`
} `json:"pgs_by_state"`
NumPGs json.Number `json:"num_pgs"`
} `json:"pgmap"`
}
@ -615,6 +645,28 @@ func (c *ClusterHealthCollector) collect() error {
}
c.RemappedPGs.Set(remappedPGs)
scrubbingPGs := float64(0)
deepScrubbingPGs := float64(0)
for _, pg := range stats.PGMap.PGsByState {
if strings.Contains(pg.StateName, "scrubbing") {
if strings.Contains(pg.StateName, "deep") {
deepScrubbingCount, err := pg.Count.Float64()
if err != nil {
return err
}
deepScrubbingPGs += deepScrubbingCount
} else {
scrubbingCount, err := pg.Count.Float64()
if err != nil {
return err
}
scrubbingPGs += scrubbingCount
}
}
}
c.ScrubbingPGs.Set(scrubbingPGs)
c.DeepScrubbingPGs.Set(deepScrubbingPGs)
totalPGs, err := stats.PGMap.NumPGs.Float64()
if err != nil {
return err

View File

@ -341,6 +341,37 @@ $ sudo ceph -s
regexp.MustCompile(`total_pgs{cluster="ceph"} 52000`),
},
},
{
input: `
{
"osdmap": {
"osdmap": {
"num_osds": 0,
"num_up_osds": 0,
"num_in_osds": 0,
"num_remapped_pgs": 0
}
},
"pgmap": {
"pgs_by_state": [
{
"state_name": "active+clean+scrubbing",
"count": 2
},
{
"state_name": "active+clean+scrubbing+deep",
"count": 5
}
],
"num_pgs": 52000
},
"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "7 pgs undersized"}]}
}`,
regexes: []*regexp.Regexp{
regexp.MustCompile(`scrubbing_pgs{cluster="ceph"} 2`),
regexp.MustCompile(`deep_scrubbing_pgs{cluster="ceph"} 5`),
},
},
} {
func() {
collector := NewClusterHealthCollector(NewNoopConn(tt.input), "ceph")