osd: Add new collector for osd metadata

scrape created_at, ceph_version_when_created, and osd_objectstore
from ceph osd metadata
This commit is contained in:
Tyler Brekke 2023-08-24 17:33:10 -07:00
parent 2df38cb776
commit 3c403081b5
3 changed files with 117 additions and 2 deletions

View File

@ -70,7 +70,7 @@ func NewExporter(conn Conn, cluster string, config string, user string, rgwMode
func (exporter *Exporter) initCollectors() map[string]versionedCollector {
standardCollectors := map[string]versionedCollector{
"clusterUage": NewClusterUsageCollector(exporter),
"clusterUsage": NewClusterUsageCollector(exporter),
"poolUsage": NewPoolUsageCollector(exporter),
"poolInfo": NewPoolInfoCollector(exporter),
"clusterHealth": NewClusterHealthCollector(exporter),

View File

@ -19,6 +19,7 @@ import (
"encoding/json"
"fmt"
"math"
"strconv"
"strings"
"sync"
"time"
@ -99,6 +100,9 @@ type OSDCollector struct {
// OSDUp displays the Up state of the OSD
OSDUp *prometheus.GaugeVec
// OSDMetaData displays metadata of an OSD
OSDMetadata *prometheus.GaugeVec
// OSDFullRatio displays current full_ratio of OSD
OSDFullRatio prometheus.Gauge
@ -155,6 +159,7 @@ func NewOSDCollector(exporter *Exporter) *OSDCollector {
labels := make(prometheus.Labels)
labels["cluster"] = exporter.Cluster
osdLabels := []string{"osd", "device_class", "host", "rack", "root"}
osdMetadataLabels := []string{"osd", "objectstore", "ceph_version_when_created", "created_at"}
o := &OSDCollector{
conn: exporter.Conn,
@ -395,6 +400,16 @@ func NewOSDCollector(exporter *Exporter) *OSDCollector {
osdLabels,
),
OSDMetadata: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "osd_metadata",
Help: "OSD Metadata",
ConstLabels: labels,
},
osdMetadataLabels,
),
OSDDownDesc: prometheus.NewDesc(
fmt.Sprintf("%s_osd_down", cephNamespace),
"Number of OSDs down in the cluster",
@ -460,6 +475,7 @@ func (o *OSDCollector) collectorList() []prometheus.Collector {
o.ApplyLatency,
o.OSDIn,
o.OSDUp,
o.OSDMetadata,
o.OSDFullRatio,
o.OSDNearFullRatio,
o.OSDBackfillFullRatio,
@ -582,6 +598,13 @@ type cephOSDLabel struct {
parent int64 // parent id when building tables
}
type cephOSDMetadata struct {
ID int `json:"id"`
CephVersionWhenCreated string `json:"ceph_version_when_created"`
CreatedAt string `json:"created_at"`
OsdObjectstore string `json:"osd_objectstore"`
}
func (o *OSDCollector) collectOSDDF() error {
args := o.cephOSDDFCommand()
buf, _, err := o.conn.MgrCommand(args)
@ -701,6 +724,29 @@ func (o *OSDCollector) collectOSDDF() error {
}
func (o *OSDCollector) collectOSDMetadata() error {
cmd := o.cephOSDMetadataCommand()
buf, _, err := o.conn.MonCommand(cmd)
if err != nil {
o.logger.WithError(err).WithField(
"args", string(cmd),
).Error("error executing mon command")
return err
}
var osdMetadata []cephOSDMetadata
if err := json.Unmarshal(buf, &osdMetadata); err != nil {
return err
}
for _, osd := range osdMetadata {
o.OSDMetadata.WithLabelValues(strconv.Itoa(osd.ID), osd.OsdObjectstore, osd.CephVersionWhenCreated, osd.CreatedAt).Set(1)
}
return nil
}
func (o *OSDCollector) collectOSDPerf() error {
args := o.cephOSDPerfCommand()
buf, _, err := o.conn.MgrCommand(args)
@ -1047,6 +1093,17 @@ func (o *OSDCollector) cephOSDPerfCommand() [][]byte {
return [][]byte{cmd}
}
func (o *OSDCollector) cephOSDMetadataCommand() []byte {
cmd, err := json.Marshal(map[string]interface{}{
"prefix": "osd metadata",
"format": jsonFormat,
})
if err != nil {
o.logger.WithError(err).Panic("error marshalling ceph osd metadata")
}
return cmd
}
func (o *OSDCollector) cephOSDTreeCommand(states ...string) []byte {
req := map[string]interface{}{
"prefix": "osd tree",
@ -1131,7 +1188,7 @@ func (o *OSDCollector) Describe(ch chan<- *prometheus.Desc) {
// Collect sends all the collected metrics to the provided Prometheus channel.
// It requires the caller to handle synchronization.
func (o *OSDCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
// Reset daemon specifc metrics; daemons can leave the cluster
// Reset daemon specific metrics; daemons can leave the cluster
o.CrushWeight.Reset()
o.Depth.Reset()
o.Reweight.Reset()
@ -1145,6 +1202,7 @@ func (o *OSDCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
o.ApplyLatency.Reset()
o.OSDIn.Reset()
o.OSDUp.Reset()
o.OSDMetadata.Reset()
o.buildOSDLabelCache()
localWg := &sync.WaitGroup{}
@ -1157,6 +1215,14 @@ func (o *OSDCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
}
}()
localWg.Add(1)
go func() {
defer localWg.Done()
if err := o.collectOSDMetadata(); err != nil {
o.logger.WithError(err).Error("error collecting OSD metadata metrics")
}
}()
localWg.Add(1)
go func() {
defer localWg.Done()

View File

@ -414,6 +414,11 @@ func TestOSDCollector(t *testing.T) {
regexp.MustCompile(`ceph_osd_in{cluster="ceph",device_class="ssd",host="prod-data01-block01",osd="osd.2",rack="A8R1",root="default"} 1`),
regexp.MustCompile(`ceph_osd_in{cluster="ceph",device_class="ssd",host="prod-data01-block01",osd="osd.3",rack="A8R1",root="default"} 1`),
regexp.MustCompile(`ceph_osd_in{cluster="ceph",device_class="ssd",host="prod-data01-block01",osd="osd.4",rack="A8R1",root="default"} 0`),
regexp.MustCompile(`ceph_osd_metadata{ceph_version_when_created="ceph version 16.2.11-119-g6e981ce \(6e981ceb1084ad7628ea32a6a0a23ce09bc5cf8b\) pacific \(stable\)",cluster="ceph",created_at="2023-03-24T20:25:57.763728Z",objectstore="bluestore",osd="0"} 1`),
regexp.MustCompile(`ceph_osd_metadata{ceph_version_when_created="",cluster="ceph",created_at="",objectstore="filestore",osd="1"} 1`),
regexp.MustCompile(`ceph_osd_metadata{ceph_version_when_created="ceph version 16.2.11-119-g6e981ce \(6e981ceb1084ad7628ea32a6a0a23ce09bc5cf8b\) pacific \(stable\)",cluster="ceph",created_at="2023-03-24T20:25:57.763728Z",objectstore="bluestore",osd="2"} 1`),
regexp.MustCompile(`ceph_osd_metadata{ceph_version_when_created="",cluster="ceph",created_at="",objectstore="filestore",osd="3"} 1`),
regexp.MustCompile(`ceph_osd_metadata{ceph_version_when_created="",cluster="ceph",created_at="",objectstore="filestore",osd="4"} 1`),
regexp.MustCompile(`ceph_osd_up{cluster="ceph",device_class="hdd",host="prod-data01-block01",osd="osd.0",rack="A8R1",root="default"} 1`),
regexp.MustCompile(`ceph_osd_up{cluster="ceph",device_class="ssd",host="prod-data01-block01",osd="osd.1",rack="A8R1",root="default"} 1`),
regexp.MustCompile(`ceph_osd_up{cluster="ceph",device_class="ssd",host="prod-data01-block01",osd="osd.2",rack="A8R1",root="default"} 1`),
@ -810,6 +815,50 @@ func TestOSDCollector(t *testing.T) {
]
}`), "", nil)
conn.On("MonCommand", mock.MatchedBy(func(in interface{}) bool {
v := map[string]interface{}{}
err := json.Unmarshal(in.([]byte), &v)
require.NoError(t, err)
return cmp.Equal(v, map[string]interface{}{
"prefix": "osd metadata",
"format": "json",
})
})).Return([]byte(`
[
{
"id": 0,
"osd_objectstore": "bluestore",
"ceph_version_when_created": "ceph version 16.2.11-119-g6e981ce (6e981ceb1084ad7628ea32a6a0a23ce09bc5cf8b) pacific (stable)",
"created_at": "2023-03-24T20:25:57.763728Z"
},
{
"id": 1,
"osd_objectstore": "filestore",
"ceph_version_when_created": "",
"created_at": ""
},
{
"id": 2,
"osd_objectstore": "bluestore",
"ceph_version_when_created": "ceph version 16.2.11-119-g6e981ce (6e981ceb1084ad7628ea32a6a0a23ce09bc5cf8b) pacific (stable)",
"created_at": "2023-03-24T20:25:57.763728Z"
},
{
"id": 3,
"osd_objectstore": "filestore",
"ceph_version_when_created": "",
"created_at": ""
},
{
"id": 4,
"osd_objectstore": "filestore",
"ceph_version_when_created": "",
"created_at": ""
}
]`), "", nil)
conn.On("MgrCommand", mock.MatchedBy(func(in interface{}) bool {
v := map[string]interface{}{}