mirror of
https://github.com/digitalocean/ceph_exporter
synced 2025-03-31 07:38:38 +00:00
osd: add metrics for down and destroyed OSD
This commit is contained in:
parent
9a39cc64ed
commit
aa5abdc470
@ -6,8 +6,9 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"strings"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -78,6 +79,9 @@ type OSDCollector struct {
|
||||
// OSDUp displays the Up state of the OSD
|
||||
OSDUp *prometheus.GaugeVec
|
||||
|
||||
// OSDDownDesc displays OSDs present in the cluster in "down" state
|
||||
OSDDownDesc *prometheus.Desc
|
||||
|
||||
// TotalBytes displays total bytes in all OSDs
|
||||
TotalBytes prometheus.Gauge
|
||||
|
||||
@ -269,6 +273,12 @@ func NewOSDCollector(conn Conn, cluster string) *OSDCollector {
|
||||
},
|
||||
[]string{"osd"},
|
||||
),
|
||||
OSDDownDesc: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_osd_down", cephNamespace),
|
||||
"No. of OSDs down in the cluster",
|
||||
[]string{"osd", "status"},
|
||||
labels,
|
||||
),
|
||||
ScrubbingStateDesc: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_osd_scrub_state", cephNamespace),
|
||||
"State of OSDs involved in a scrub",
|
||||
@ -340,7 +350,22 @@ type cephOSDDump struct {
|
||||
} `json:"osds"`
|
||||
}
|
||||
|
||||
func (o *OSDCollector) collect() error {
|
||||
type cephOSDTreeDown struct {
|
||||
Nodes []struct {
|
||||
ID int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Status string `json:"status"`
|
||||
} `json:"nodes"`
|
||||
Stray []struct {
|
||||
ID int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Status string `json:"status"`
|
||||
} `json:"stray"`
|
||||
}
|
||||
|
||||
func (o *OSDCollector) collectOSDDF() error {
|
||||
cmd := o.cephOSDDFCommand()
|
||||
|
||||
buf, _, err := o.conn.MonCommand(cmd)
|
||||
@ -493,6 +518,34 @@ func (o *OSDCollector) collectOSDPerf() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *OSDCollector) collectOSDTreeDown(ch chan<- prometheus.Metric) error {
|
||||
osdDownCmd := o.cephOSDTreeCommand("down")
|
||||
buff, _, err := o.conn.MonCommand(osdDownCmd)
|
||||
if err != nil {
|
||||
log.Println("[ERROR] Unable to collect data from ceph osd tree down", err)
|
||||
return err
|
||||
}
|
||||
|
||||
osdDown := &cephOSDTreeDown{}
|
||||
if err := json.Unmarshal(buff, osdDown); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
downItems := append(osdDown.Nodes, osdDown.Stray...)
|
||||
|
||||
for _, downItem := range downItems {
|
||||
if downItem.Type != "osd" {
|
||||
continue
|
||||
}
|
||||
|
||||
osdName := downItem.Name
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(o.OSDDownDesc, prometheus.GaugeValue, 1, osdName, downItem.Status)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *OSDCollector) collectOSDDump() error {
|
||||
osdDumpCmd := o.cephOSDDump()
|
||||
buff, _, err := o.conn.MonCommand(osdDumpCmd)
|
||||
@ -541,7 +594,6 @@ func (o *OSDCollector) collectOSDScrubState(ch chan<- prometheus.Metric) error {
|
||||
|
||||
stats := cephPGDumpBriefResponse{}
|
||||
if err := json.Unmarshal(buf, &stats); err != nil {
|
||||
log.Println("Unmarshal:", string(buf[:100]))
|
||||
return err
|
||||
}
|
||||
|
||||
@ -610,6 +662,18 @@ func (o *OSDCollector) cephOSDPerfCommand() []byte {
|
||||
return cmd
|
||||
}
|
||||
|
||||
func (o *OSDCollector) cephOSDTreeCommand(states ...string) []byte {
|
||||
cmd, err := json.Marshal(map[string]interface{}{
|
||||
"prefix": "osd tree",
|
||||
"states": states,
|
||||
"format": "json",
|
||||
})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return cmd
|
||||
}
|
||||
|
||||
func (o *OSDCollector) cephPGDumpCommand() []byte {
|
||||
cmd, err := json.Marshal(map[string]interface{}{
|
||||
"prefix": "pg dump",
|
||||
@ -644,7 +708,11 @@ func (o *OSDCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
log.Println("failed collecting osd dump:", err)
|
||||
}
|
||||
|
||||
if err := o.collect(); err != nil {
|
||||
if err := o.collectOSDDF(); err != nil {
|
||||
log.Println("failed collecting osd metrics:", err)
|
||||
}
|
||||
|
||||
if err := o.collectOSDTreeDown(ch); err != nil {
|
||||
log.Println("failed collecting osd metrics:", err)
|
||||
}
|
||||
|
||||
|
@ -310,6 +310,181 @@ func TestOSDCollector(t *testing.T) {
|
||||
regexp.MustCompile(`ceph_osd_scrub_state{cluster="ceph",osd="osd.23"} 2`),
|
||||
},
|
||||
},
|
||||
{
|
||||
input: `
|
||||
{
|
||||
"nodes": [],
|
||||
"stray": [
|
||||
{
|
||||
"id": 524,
|
||||
"name": "osd.524",
|
||||
"type": "osd",
|
||||
"type_id": 0,
|
||||
"crush_weight": 0.000000,
|
||||
"depth": 0,
|
||||
"exists": 1,
|
||||
"status": "destroyed",
|
||||
"reweight": 0.000000,
|
||||
"primary_affinity": 1.000000
|
||||
}
|
||||
]
|
||||
}`,
|
||||
regexes: []*regexp.Regexp{
|
||||
regexp.MustCompile(`ceph_osd_down{cluster="ceph",osd="osd.524",status="destroyed"} 1`),
|
||||
},
|
||||
},
|
||||
{
|
||||
input: `
|
||||
{
|
||||
"nodes": [],
|
||||
"stray": [
|
||||
{
|
||||
"id": 524,
|
||||
"name": "osd.524",
|
||||
"type": "osd",
|
||||
"type_id": 0,
|
||||
"crush_weight": 0.000000,
|
||||
"depth": 0,
|
||||
"exists": 1,
|
||||
"status": "down",
|
||||
"reweight": 0.000000,
|
||||
"primary_affinity": 1.000000
|
||||
}
|
||||
]
|
||||
}`,
|
||||
regexes: []*regexp.Regexp{
|
||||
regexp.MustCompile(`ceph_osd_down{cluster="ceph",osd="osd.524",status="down"} 1`),
|
||||
},
|
||||
},
|
||||
{
|
||||
input: `
|
||||
{
|
||||
"nodes": [
|
||||
{
|
||||
"id": -18,
|
||||
"name": "data",
|
||||
"type": "root",
|
||||
"type_id": 10,
|
||||
"children": [
|
||||
-20
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": -20,
|
||||
"name": "R1-data",
|
||||
"type": "rack",
|
||||
"type_id": 3,
|
||||
"pool_weights": {},
|
||||
"children": [
|
||||
-8
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": -8,
|
||||
"name": "test-data03-object01",
|
||||
"type": "host",
|
||||
"type_id": 1,
|
||||
"pool_weights": {},
|
||||
"children": [
|
||||
97
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 524,
|
||||
"device_class": "hdd",
|
||||
"name": "osd.524",
|
||||
"type": "osd",
|
||||
"type_id": 0,
|
||||
"crush_weight": 7.265991,
|
||||
"depth": 3,
|
||||
"pool_weights": {},
|
||||
"exists": 1,
|
||||
"status": "destroyed",
|
||||
"reweight": 0.000000,
|
||||
"primary_affinity": 1.000000
|
||||
}
|
||||
],
|
||||
"stray": []
|
||||
}`,
|
||||
regexes: []*regexp.Regexp{
|
||||
regexp.MustCompile(`ceph_osd_down{cluster="ceph",osd="osd.524",status="destroyed"} 1`),
|
||||
},
|
||||
},
|
||||
{
|
||||
input: `
|
||||
{
|
||||
"nodes": [
|
||||
{
|
||||
"id": -18,
|
||||
"name": "data",
|
||||
"type": "root",
|
||||
"type_id": 10,
|
||||
"children": [
|
||||
-20
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": -20,
|
||||
"name": "R1-data",
|
||||
"type": "rack",
|
||||
"type_id": 3,
|
||||
"pool_weights": {},
|
||||
"children": [
|
||||
-8
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": -8,
|
||||
"name": "test-data03-object01",
|
||||
"type": "host",
|
||||
"type_id": 1,
|
||||
"pool_weights": {},
|
||||
"children": [
|
||||
97
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 524,
|
||||
"device_class": "hdd",
|
||||
"name": "osd.524",
|
||||
"type": "osd",
|
||||
"type_id": 0,
|
||||
"crush_weight": 7.265991,
|
||||
"depth": 3,
|
||||
"pool_weights": {},
|
||||
"exists": 1,
|
||||
"status": "destroyed",
|
||||
"reweight": 0.000000,
|
||||
"primary_affinity": 1.000000
|
||||
}
|
||||
],
|
||||
"stray": [
|
||||
{
|
||||
"id": 525,
|
||||
"name": "osd.525",
|
||||
"type": "osd",
|
||||
"type_id": 0,
|
||||
"crush_weight": 0.000000,
|
||||
"depth": 0,
|
||||
"exists": 1,
|
||||
"status": "down",
|
||||
"reweight": 0.000000,
|
||||
"primary_affinity": 1.000000
|
||||
}
|
||||
]
|
||||
}`,
|
||||
regexes: []*regexp.Regexp{
|
||||
regexp.MustCompile(`ceph_osd_down{cluster="ceph",osd="osd.524",status="destroyed"} 1`),
|
||||
regexp.MustCompile(`ceph_osd_down{cluster="ceph",osd="osd.525",status="down"} 1`),
|
||||
},
|
||||
},
|
||||
{
|
||||
input: `
|
||||
{
|
||||
"nodes": []}}
|
||||
}`,
|
||||
regexes: []*regexp.Regexp{},
|
||||
},
|
||||
} {
|
||||
func() {
|
||||
collector := NewOSDCollector(NewNoopConn(tt.input), "ceph")
|
||||
|
Loading…
Reference in New Issue
Block a user