mirror of
https://github.com/digitalocean/ceph_exporter
synced 2025-02-12 17:37:23 +00:00
monitors: add back clock skew and latency metric support
This commit is contained in:
parent
9a39cc64ed
commit
6f290751c9
@ -210,6 +210,14 @@ func (m *MonitorCollector) metricsList() []prometheus.Metric {
|
||||
}
|
||||
}
|
||||
|
||||
type cephTimeSyncStatus struct {
|
||||
TimeChecks map[string]struct {
|
||||
Health string `json:"health"`
|
||||
Latency json.Number `json:"latency"`
|
||||
Skew json.Number `json:"skew"`
|
||||
} `json:"time_skew_status"`
|
||||
}
|
||||
|
||||
type cephMonitorStats struct {
|
||||
Health struct {
|
||||
Health struct {
|
||||
@ -252,6 +260,17 @@ func (m *MonitorCollector) collect() error {
|
||||
return err
|
||||
}
|
||||
|
||||
cmd = m.cephTimeSyncStatusCommand()
|
||||
buf, _, err = m.conn.MonCommand(cmd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
timeStats := &cephTimeSyncStatus{}
|
||||
if err := json.Unmarshal(buf, timeStats); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, healthService := range stats.Health.Health.HealthServices {
|
||||
for _, monstat := range healthService.Mons {
|
||||
kbTotal, err := monstat.KBTotal.Float64()
|
||||
@ -318,6 +337,20 @@ func (m *MonitorCollector) collect() error {
|
||||
m.Latency.WithLabelValues(monstat.Name).Set(latency)
|
||||
}
|
||||
|
||||
for monNode, tstat := range timeStats.TimeChecks {
|
||||
skew, err := tstat.Skew.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.ClockSkew.WithLabelValues(monNode).Set(skew)
|
||||
|
||||
latency, err := tstat.Latency.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Latency.WithLabelValues(monNode).Set(latency)
|
||||
}
|
||||
|
||||
m.NodesinQuorum.Set(float64(len(stats.Quorum)))
|
||||
|
||||
return nil
|
||||
@ -336,6 +369,19 @@ func (m *MonitorCollector) cephUsageCommand() []byte {
|
||||
return cmd
|
||||
}
|
||||
|
||||
func (m *MonitorCollector) cephTimeSyncStatusCommand() []byte {
|
||||
cmd, err := json.Marshal(map[string]interface{}{
|
||||
"prefix": "time-sync-status",
|
||||
"format": "json",
|
||||
})
|
||||
if err != nil {
|
||||
// panic! because ideally in no world this hard-coded input
|
||||
// should fail.
|
||||
panic(err)
|
||||
}
|
||||
return cmd
|
||||
}
|
||||
|
||||
// Describe sends the descriptors of each Monitor related metric we have defined
|
||||
// to the channel provided.
|
||||
func (m *MonitorCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
|
@ -290,3 +290,124 @@ func TestMonitorCollector(t *testing.T) {
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
func TestMonitorTimeSyncStats(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
input string
|
||||
regexes []*regexp.Regexp
|
||||
}{
|
||||
{`
|
||||
{
|
||||
"time_skew_status": {
|
||||
"test-mon01": {
|
||||
"skew": 0.000022,
|
||||
"latency": 0.000677,
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
"test-mon02": {
|
||||
"skew": 0.001051,
|
||||
"latency": 0.000682,
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
"test-mon03": {
|
||||
"skew": 0.003029,
|
||||
"latency": 0.000582,
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
"test-mon04": {
|
||||
"skew": 0.000330,
|
||||
"latency": 0.000667,
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
"test-mon05": {
|
||||
"skew": 0.003682,
|
||||
"latency": 0.000667,
|
||||
"health": "HEALTH_OK"
|
||||
}
|
||||
},
|
||||
"timechecks": {
|
||||
"epoch": 84,
|
||||
"round": 69600,
|
||||
"round_status": "finished"
|
||||
}
|
||||
}
|
||||
`,
|
||||
[]*regexp.Regexp{
|
||||
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{cluster="ceph",monitor="test-mon01"} 2.2e\-05`),
|
||||
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{cluster="ceph",monitor="test-mon02"} 0.001051`),
|
||||
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{cluster="ceph",monitor="test-mon03"} 0.003029`),
|
||||
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{cluster="ceph",monitor="test-mon04"} 0.00033`),
|
||||
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{cluster="ceph",monitor="test-mon05"} 0.003682`),
|
||||
regexp.MustCompile(`ceph_monitor_latency_seconds{cluster="ceph",monitor="test-mon01"} 0.000677`),
|
||||
regexp.MustCompile(`ceph_monitor_latency_seconds{cluster="ceph",monitor="test-mon02"} 0.000682`),
|
||||
regexp.MustCompile(`ceph_monitor_latency_seconds{cluster="ceph",monitor="test-mon03"} 0.000582`),
|
||||
regexp.MustCompile(`ceph_monitor_latency_seconds{cluster="ceph",monitor="test-mon04"} 0.000667`),
|
||||
regexp.MustCompile(`ceph_monitor_latency_seconds{cluster="ceph",monitor="test-mon05"} 0.000667`),
|
||||
},
|
||||
},
|
||||
{`
|
||||
{
|
||||
"time_skew_status": {
|
||||
"test-mon01": {
|
||||
"skew": "wrong!",
|
||||
"latency": 0.000677,
|
||||
"health": "HEALTH_OK"
|
||||
}
|
||||
}
|
||||
`,
|
||||
[]*regexp.Regexp{},
|
||||
},
|
||||
{`
|
||||
{
|
||||
"time_skew_status": {
|
||||
"test-mon01": {
|
||||
"skew": 0.000334,
|
||||
"latency": "wrong!",
|
||||
"health": "HEALTH_OK"
|
||||
}
|
||||
}
|
||||
`,
|
||||
[]*regexp.Regexp{},
|
||||
},
|
||||
{`
|
||||
{
|
||||
"time_skew_status": {
|
||||
"test-mon01": {
|
||||
"skew"::: "0.000334",
|
||||
"latency"::: "0.000677",
|
||||
"health": "HEALTH_OK"
|
||||
}
|
||||
}
|
||||
`,
|
||||
[]*regexp.Regexp{},
|
||||
},
|
||||
} {
|
||||
func() {
|
||||
collector := NewMonitorCollector(NewNoopConn(tt.input), "ceph")
|
||||
if err := prometheus.Register(collector); err != nil {
|
||||
t.Fatalf("collector failed to register: %s", err)
|
||||
}
|
||||
defer prometheus.Unregister(collector)
|
||||
|
||||
server := httptest.NewServer(prometheus.Handler())
|
||||
defer server.Close()
|
||||
|
||||
resp, err := http.Get(server.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected failed response from prometheus: %s", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
buf, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("failed reading server response: %s", err)
|
||||
}
|
||||
|
||||
for _, re := range tt.regexes {
|
||||
if !re.Match(buf) {
|
||||
t.Errorf("failed matching: %q", re)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user