Osd to OSD.

This commit is contained in:
madzfe 2016-07-29 11:04:55 +08:00
parent 068d1afb80
commit a548fe25b4
3 changed files with 69 additions and 67 deletions

View File

@ -8,66 +8,68 @@ import (
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
) )
//OsdCollector sample comment // OSDCollector displays statistics about OSD in the ceph cluster.
type OsdCollector struct { // An important aspect of monitoring OSDs is to ensure that when the cluster is up and
// running that all OSDs that are in the cluster are up and running, too
type OSDCollector struct {
conn Conn conn Conn
//CrushWeight is a persistent setting, and it affects how CRUSH assigns data to OSDs. // CrushWeight is a persistent setting, and it affects how CRUSH assigns data to OSDs.
//It displays the CRUSH weight for the OSD // It displays the CRUSH weight for the OSD
CrushWeight *prometheus.GaugeVec CrushWeight *prometheus.GaugeVec
//Depth displays the OSD's level of hierarchy in the CRUSH map // Depth displays the OSD's level of hierarchy in the CRUSH map
Depth *prometheus.GaugeVec Depth *prometheus.GaugeVec
//Reweight sets an override weight on the OSD. // Reweight sets an override weight on the OSD.
//It displays value within 0 to 1. // It displays value within 0 to 1.
Reweight *prometheus.GaugeVec Reweight *prometheus.GaugeVec
//Bytes displays the total bytes available in the OSD // Bytes displays the total bytes available in the OSD
Bytes *prometheus.GaugeVec Bytes *prometheus.GaugeVec
//UsedBytes displays the total used bytes in the OSD // UsedBytes displays the total used bytes in the OSD
UsedBytes *prometheus.GaugeVec UsedBytes *prometheus.GaugeVec
//AvailBytes displays the total available bytes in the OSD // AvailBytes displays the total available bytes in the OSD
AvailBytes *prometheus.GaugeVec AvailBytes *prometheus.GaugeVec
//Utilization displays current utilization of the OSD // Utilization displays current utilization of the OSD
Utilization *prometheus.GaugeVec Utilization *prometheus.GaugeVec
//Pgs displays total no. of placement groups in the OSD. // Pgs displays total no. of placement groups in the OSD.
//Available in Ceph Jewel version. // Available in Ceph Jewel version.
Pgs *prometheus.GaugeVec Pgs *prometheus.GaugeVec
//CommitLatency displays in seconds how long it takes for an operation to be applied to disk // CommitLatency displays in seconds how long it takes for an operation to be applied to disk
CommitLatency *prometheus.GaugeVec CommitLatency *prometheus.GaugeVec
//ApplyLatency displays in seconds how long it takes to get applied to the backing filesystem // ApplyLatency displays in seconds how long it takes to get applied to the backing filesystem
ApplyLatency *prometheus.GaugeVec ApplyLatency *prometheus.GaugeVec
//OsdsIn displays the In state of the OSD // OSDIn displays the In state of the OSD
OsdIn *prometheus.GaugeVec OSDIn *prometheus.GaugeVec
//OsdsUP displays the Up state of the OSD // OSDUp displays the Up state of the OSD
OsdUp *prometheus.GaugeVec OSDUp *prometheus.GaugeVec
//TotalBytes displays total bytes in all OSDs // TotalBytes displays total bytes in all OSDs
TotalBytes prometheus.Gauge TotalBytes prometheus.Gauge
//TotalUsedBytes displays total used bytes in all OSDs // TotalUsedBytes displays total used bytes in all OSDs
TotalUsedBytes prometheus.Gauge TotalUsedBytes prometheus.Gauge
//TotalAvailBytes displays total available bytes in all OSDs // TotalAvailBytes displays total available bytes in all OSDs
TotalAvailBytes prometheus.Gauge TotalAvailBytes prometheus.Gauge
//AverageUtil displays average utilization in all OSDs // AverageUtil displays average utilization in all OSDs
AverageUtil prometheus.Gauge AverageUtil prometheus.Gauge
} }
//NewOsdCollector creates an instance of the OsdCollector and instantiates //NewOSDCollector creates an instance of the OSDCollector and instantiates
// the individual metrics that show information about the osd. // the individual metrics that show information about the OSD.
func NewOsdCollector(conn Conn) *OsdCollector { func NewOSDCollector(conn Conn) *OSDCollector {
return &OsdCollector{ return &OSDCollector{
conn: conn, conn: conn,
CrushWeight: prometheus.NewGaugeVec( CrushWeight: prometheus.NewGaugeVec(
@ -191,7 +193,7 @@ func NewOsdCollector(conn Conn) *OsdCollector {
[]string{"osd"}, []string{"osd"},
), ),
OsdIn: prometheus.NewGaugeVec( OSDIn: prometheus.NewGaugeVec(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Namespace: cephNamespace, Namespace: cephNamespace,
Name: "osd_in", Name: "osd_in",
@ -200,7 +202,7 @@ func NewOsdCollector(conn Conn) *OsdCollector {
[]string{"osd"}, []string{"osd"},
), ),
OsdUp: prometheus.NewGaugeVec( OSDUp: prometheus.NewGaugeVec(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Namespace: cephNamespace, Namespace: cephNamespace,
Name: "osd_up", Name: "osd_up",
@ -211,7 +213,7 @@ func NewOsdCollector(conn Conn) *OsdCollector {
} }
} }
func (o *OsdCollector) collectorList() []prometheus.Collector { func (o *OSDCollector) collectorList() []prometheus.Collector {
return []prometheus.Collector{ return []prometheus.Collector{
o.CrushWeight, o.CrushWeight,
o.Depth, o.Depth,
@ -227,13 +229,13 @@ func (o *OsdCollector) collectorList() []prometheus.Collector {
o.AverageUtil, o.AverageUtil,
o.CommitLatency, o.CommitLatency,
o.ApplyLatency, o.ApplyLatency,
o.OsdIn, o.OSDIn,
o.OsdUp, o.OSDUp,
} }
} }
type cephOsdDf struct { type cephOSDDF struct {
OsdNodes []struct { OSDNodes []struct {
Name string `json:"name"` Name string `json:"name"`
CrushWeight json.Number `json:"crush_weight"` CrushWeight json.Number `json:"crush_weight"`
Depth json.Number `json:"depth"` Depth json.Number `json:"depth"`
@ -263,16 +265,16 @@ type cephPerfStat struct {
} `json:"osd_perf_infos"` } `json:"osd_perf_infos"`
} }
type cephOsdDump struct { type cephOSDDump struct {
Osds []struct { OSDs []struct {
Osd json.Number `json:"osd"` OSD json.Number `json:"osd"`
Up json.Number `json:"up"` Up json.Number `json:"up"`
In json.Number `json:"in"` In json.Number `json:"in"`
} `json:"osds"` } `json:"osds"`
} }
func (o *OsdCollector) collect() error { func (o *OSDCollector) collect() error {
cmd := o.cephOSDDfCommand() cmd := o.cephOSDDFCommand()
buf, _, err := o.conn.MonCommand(cmd) buf, _, err := o.conn.MonCommand(cmd)
if err != nil { if err != nil {
@ -280,12 +282,12 @@ func (o *OsdCollector) collect() error {
return err return err
} }
osdDf := &cephOsdDf{} osdDF := &cephOSDDF{}
if err := json.Unmarshal(buf, osdDf); err != nil { if err := json.Unmarshal(buf, osdDF); err != nil {
return err return err
} }
for _, node := range osdDf.OsdNodes { for _, node := range osdDF.OSDNodes {
crushWeight, err := node.CrushWeight.Float64() crushWeight, err := node.CrushWeight.Float64()
if err != nil { if err != nil {
@ -346,28 +348,28 @@ func (o *OsdCollector) collect() error {
} }
totalKB, err := osdDf.Summary.TotalKB.Float64() totalKB, err := osdDF.Summary.TotalKB.Float64()
if err != nil { if err != nil {
return err return err
} }
o.TotalBytes.Set(totalKB * 1e3) o.TotalBytes.Set(totalKB * 1e3)
totalUsedKB, err := osdDf.Summary.TotalUsedKB.Float64() totalUsedKB, err := osdDF.Summary.TotalUsedKB.Float64()
if err != nil { if err != nil {
return err return err
} }
o.TotalUsedBytes.Set(totalUsedKB * 1e3) o.TotalUsedBytes.Set(totalUsedKB * 1e3)
totalAvailKB, err := osdDf.Summary.TotalAvailKB.Float64() totalAvailKB, err := osdDF.Summary.TotalAvailKB.Float64()
if err != nil { if err != nil {
return err return err
} }
o.TotalAvailBytes.Set(totalAvailKB * 1e3) o.TotalAvailBytes.Set(totalAvailKB * 1e3)
averageUtil, err := osdDf.Summary.AverageUtil.Float64() averageUtil, err := osdDF.Summary.AverageUtil.Float64()
if err != nil { if err != nil {
return err return err
} }
@ -378,7 +380,7 @@ func (o *OsdCollector) collect() error {
} }
func (o *OsdCollector) collectOsdPerf() error { func (o *OSDCollector) collectOSDPerf() error {
osdPerfCmd := o.cephOSDPerfCommand() osdPerfCmd := o.cephOSDPerfCommand()
buf, _, err := o.conn.MonCommand(osdPerfCmd) buf, _, err := o.conn.MonCommand(osdPerfCmd)
if err != nil { if err != nil {
@ -414,21 +416,21 @@ func (o *OsdCollector) collectOsdPerf() error {
return nil return nil
} }
func (o *OsdCollector) collectOsdDump() error { func (o *OSDCollector) collectOSDDump() error {
osdDumpCmd := o.cephOsdDump() osdDumpCmd := o.cephOSDDump()
buff, _, err := o.conn.MonCommand(osdDumpCmd) buff, _, err := o.conn.MonCommand(osdDumpCmd)
if err != nil { if err != nil {
log.Println("[ERROR] Unable to collect data from ceph osd dump", err) log.Println("[ERROR] Unable to collect data from ceph osd dump", err)
return err return err
} }
osdDump := &cephOsdDump{} osdDump := &cephOSDDump{}
if err := json.Unmarshal(buff, osdDump); err != nil { if err := json.Unmarshal(buff, osdDump); err != nil {
return err return err
} }
for _, dumpInfo := range osdDump.Osds { for _, dumpInfo := range osdDump.OSDs {
osdID, err := dumpInfo.Osd.Int64() osdID, err := dumpInfo.OSD.Int64()
if err != nil { if err != nil {
return err return err
} }
@ -439,21 +441,21 @@ func (o *OsdCollector) collectOsdDump() error {
return err return err
} }
o.OsdIn.WithLabelValues(osdName).Set(in) o.OSDIn.WithLabelValues(osdName).Set(in)
up, err := dumpInfo.Up.Float64() up, err := dumpInfo.Up.Float64()
if err != nil { if err != nil {
return err return err
} }
o.OsdUp.WithLabelValues(osdName).Set(up) o.OSDUp.WithLabelValues(osdName).Set(up)
} }
return nil return nil
} }
func (o *OsdCollector) cephOsdDump() []byte { func (o *OSDCollector) cephOSDDump() []byte {
cmd, err := json.Marshal(map[string]interface{}{ cmd, err := json.Marshal(map[string]interface{}{
"prefix": "osd dump", "prefix": "osd dump",
"format": "json", "format": "json",
@ -464,7 +466,7 @@ func (o *OsdCollector) cephOsdDump() []byte {
return cmd return cmd
} }
func (o *OsdCollector) cephOSDDfCommand() []byte { func (o *OSDCollector) cephOSDDFCommand() []byte {
cmd, err := json.Marshal(map[string]interface{}{ cmd, err := json.Marshal(map[string]interface{}{
"prefix": "osd df", "prefix": "osd df",
"format": "json", "format": "json",
@ -475,7 +477,7 @@ func (o *OsdCollector) cephOSDDfCommand() []byte {
return cmd return cmd
} }
func (o *OsdCollector) cephOSDPerfCommand() []byte { func (o *OSDCollector) cephOSDPerfCommand() []byte {
cmd, err := json.Marshal(map[string]interface{}{ cmd, err := json.Marshal(map[string]interface{}{
"prefix": "osd perf", "prefix": "osd perf",
"format": "json", "format": "json",
@ -486,9 +488,9 @@ func (o *OsdCollector) cephOSDPerfCommand() []byte {
return cmd return cmd
} }
// Describe sends the descriptors of each OsdCollector related metrics we have defined // Describe sends the descriptors of each OSDCollector related metrics we have defined
// to the provided prometheus channel. // to the provided prometheus channel.
func (o *OsdCollector) Describe(ch chan<- *prometheus.Desc) { func (o *OSDCollector) Describe(ch chan<- *prometheus.Desc) {
for _, metric := range o.collectorList() { for _, metric := range o.collectorList() {
metric.Describe(ch) metric.Describe(ch)
} }
@ -497,14 +499,14 @@ func (o *OsdCollector) Describe(ch chan<- *prometheus.Desc) {
// Collect sends all the collected metrics to the provided prometheus channel. // Collect sends all the collected metrics to the provided prometheus channel.
// It requires the caller to handle synchronization. // It requires the caller to handle synchronization.
func (o *OsdCollector) Collect(ch chan<- prometheus.Metric) { func (o *OSDCollector) Collect(ch chan<- prometheus.Metric) {
if err := o.collectOsdPerf(); err != nil { if err := o.collectOSDPerf(); err != nil {
log.Println("failed collecting cluster osd perf stats:", err) log.Println("failed collecting osd perf stats:", err)
} }
if err := o.collectOsdDump(); err != nil { if err := o.collectOSDDump(); err != nil {
log.Println("failed collecting cluster osd dump", err) log.Println("failed collecting osd dump:", err)
} }
if err := o.collect(); err != nil { if err := o.collect(); err != nil {

View File

@ -10,7 +10,7 @@ import (
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
) )
func TestOsdCollector(t *testing.T) { func TestOSDCollector(t *testing.T) {
for _, tt := range []struct { for _, tt := range []struct {
input string input string
regexes []*regexp.Regexp regexes []*regexp.Regexp
@ -219,7 +219,7 @@ func TestOsdCollector(t *testing.T) {
}, },
} { } {
func() { func() {
collector := NewOsdCollector(NewNoopConn(tt.input)) collector := NewOSDCollector(NewNoopConn(tt.input))
if err := prometheus.Register(collector); err != nil { if err := prometheus.Register(collector); err != nil {
t.Fatalf("collector failed to register: %s", err) t.Fatalf("collector failed to register: %s", err)
} }

View File

@ -50,7 +50,7 @@ func NewCephExporter(conn *rados.Conn) *CephExporter {
collectors.NewPoolUsageCollector(conn), collectors.NewPoolUsageCollector(conn),
collectors.NewClusterHealthCollector(conn), collectors.NewClusterHealthCollector(conn),
collectors.NewMonitorCollector(conn), collectors.NewMonitorCollector(conn),
collectors.NewOsdCollector(conn), collectors.NewOSDCollector(conn),
}, },
} }
} }