health: add total pg count

This commit is contained in:
Vaibhav Bhembre 2016-12-01 21:06:22 +05:30
parent 2649185010
commit f477b37494
2 changed files with 42 additions and 2 deletions

View File

@ -53,6 +53,9 @@ type ClusterHealthCollector struct {
// HealthStatus shows the overall health status of a given cluster.
HealthStatus prometheus.Gauge
// TotalPGs shows the total no. of PGs the cluster constitutes of.
TotalPGs prometheus.Gauge
// DegradedPGs shows the no. of PGs that have some of the replicas
// missing.
DegradedPGs prometheus.Gauge
@ -87,12 +90,14 @@ type ClusterHealthCollector struct {
StuckStalePGs prometheus.Gauge
// DegradedObjectsCount gives the no. of RADOS objects are constitute the degraded PGs.
// This includes object replicas in its count.
DegradedObjectsCount prometheus.Gauge
// MisplacedObjectsCount gives the no. of RADOS objects that constitute the misplaced PGs.
// Misplaced PGs usually represent the PGs that are not in the storage locations that
// they should be in. This is different than degraded PGs which means a PG has fewer copies
// that it should.
// This includes object replicas in its count.
MisplacedObjectsCount prometheus.Gauge
// OSDsDown show the no. of OSDs that are in the DOWN state.
@ -171,6 +176,13 @@ func NewClusterHealthCollector(conn Conn) *ClusterHealthCollector {
Help: "Health status of Cluster, can vary only between 3 states (err:2, warn:1, ok:0)",
},
),
TotalPGs: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "total_pgs",
Help: "Total no. of PGs in the cluster",
},
),
DegradedPGs: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
@ -231,14 +243,14 @@ func NewClusterHealthCollector(conn Conn) *ClusterHealthCollector {
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "degraded_objects",
Help: "No. of degraded objects across all PGs",
Help: "No. of degraded objects across all PGs, includes replicas",
},
),
MisplacedObjectsCount: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "misplaced_objects",
Help: "No. of misplaced objects across all PGs",
Help: "No. of misplaced objects across all PGs, includes replicas",
},
),
OSDsDown: prometheus.NewGauge(
@ -359,6 +371,7 @@ func NewClusterHealthCollector(conn Conn) *ClusterHealthCollector {
func (c *ClusterHealthCollector) metricsList() []prometheus.Metric {
return []prometheus.Metric{
c.HealthStatus,
c.TotalPGs,
c.DegradedPGs,
c.StuckDegradedPGs,
c.UncleanPGs,
@ -404,6 +417,9 @@ type cephHealthStats struct {
NumRemappedPGs json.Number `json:"num_remapped_pgs"`
} `json:"osdmap"`
} `json:"osdmap"`
PGMap struct {
NumPGs json.Number `json:"num_pgs"`
} `json:"pgmap"`
}
func (c *ClusterHealthCollector) collect() error {
@ -568,6 +584,12 @@ func (c *ClusterHealthCollector) collect() error {
}
c.RemappedPGs.Set(remappedPGs)
totalPGs, err := stats.PGMap.NumPGs.Float64()
if err != nil {
return err
}
c.TotalPGs.Set(totalPGs)
return nil
}

View File

@ -323,6 +323,24 @@ $ sudo ceph -s
regexp.MustCompile(`cache_promote_io_ops 55`),
},
},
{
input: `
{
"osdmap": {
"osdmap": {
"num_osds": 0,
"num_up_osds": 0,
"num_in_osds": 0,
"num_remapped_pgs": 0
}
},
"pgmap": { "num_pgs": 52000 },
"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "7 pgs undersized"}]}
}`,
regexes: []*regexp.Regexp{
regexp.MustCompile(`total_pgs 52000`),
},
},
} {
func() {
collector := NewClusterHealthCollector(NewNoopConn(tt.input))