monitors: add back clock skew and latency metric support

This commit is contained in:
Vaibhav Bhembre 2018-06-23 18:51:06 -04:00
parent 9a39cc64ed
commit 6f290751c9
2 changed files with 167 additions and 0 deletions

View File

@ -210,6 +210,14 @@ func (m *MonitorCollector) metricsList() []prometheus.Metric {
}
}
type cephTimeSyncStatus struct {
TimeChecks map[string]struct {
Health string `json:"health"`
Latency json.Number `json:"latency"`
Skew json.Number `json:"skew"`
} `json:"time_skew_status"`
}
type cephMonitorStats struct {
Health struct {
Health struct {
@ -252,6 +260,17 @@ func (m *MonitorCollector) collect() error {
return err
}
cmd = m.cephTimeSyncStatusCommand()
buf, _, err = m.conn.MonCommand(cmd)
if err != nil {
return err
}
timeStats := &cephTimeSyncStatus{}
if err := json.Unmarshal(buf, timeStats); err != nil {
return err
}
for _, healthService := range stats.Health.Health.HealthServices {
for _, monstat := range healthService.Mons {
kbTotal, err := monstat.KBTotal.Float64()
@ -318,6 +337,20 @@ func (m *MonitorCollector) collect() error {
m.Latency.WithLabelValues(monstat.Name).Set(latency)
}
for monNode, tstat := range timeStats.TimeChecks {
skew, err := tstat.Skew.Float64()
if err != nil {
return err
}
m.ClockSkew.WithLabelValues(monNode).Set(skew)
latency, err := tstat.Latency.Float64()
if err != nil {
return err
}
m.Latency.WithLabelValues(monNode).Set(latency)
}
m.NodesinQuorum.Set(float64(len(stats.Quorum)))
return nil
@ -336,6 +369,19 @@ func (m *MonitorCollector) cephUsageCommand() []byte {
return cmd
}
func (m *MonitorCollector) cephTimeSyncStatusCommand() []byte {
cmd, err := json.Marshal(map[string]interface{}{
"prefix": "time-sync-status",
"format": "json",
})
if err != nil {
// panic! because ideally in no world this hard-coded input
// should fail.
panic(err)
}
return cmd
}
// Describe sends the descriptors of each Monitor related metric we have defined
// to the channel provided.
func (m *MonitorCollector) Describe(ch chan<- *prometheus.Desc) {

View File

@ -290,3 +290,124 @@ func TestMonitorCollector(t *testing.T) {
}()
}
}
func TestMonitorTimeSyncStats(t *testing.T) {
for _, tt := range []struct {
input string
regexes []*regexp.Regexp
}{
{`
{
"time_skew_status": {
"test-mon01": {
"skew": 0.000022,
"latency": 0.000677,
"health": "HEALTH_OK"
},
"test-mon02": {
"skew": 0.001051,
"latency": 0.000682,
"health": "HEALTH_OK"
},
"test-mon03": {
"skew": 0.003029,
"latency": 0.000582,
"health": "HEALTH_OK"
},
"test-mon04": {
"skew": 0.000330,
"latency": 0.000667,
"health": "HEALTH_OK"
},
"test-mon05": {
"skew": 0.003682,
"latency": 0.000667,
"health": "HEALTH_OK"
}
},
"timechecks": {
"epoch": 84,
"round": 69600,
"round_status": "finished"
}
}
`,
[]*regexp.Regexp{
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{cluster="ceph",monitor="test-mon01"} 2.2e\-05`),
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{cluster="ceph",monitor="test-mon02"} 0.001051`),
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{cluster="ceph",monitor="test-mon03"} 0.003029`),
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{cluster="ceph",monitor="test-mon04"} 0.00033`),
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{cluster="ceph",monitor="test-mon05"} 0.003682`),
regexp.MustCompile(`ceph_monitor_latency_seconds{cluster="ceph",monitor="test-mon01"} 0.000677`),
regexp.MustCompile(`ceph_monitor_latency_seconds{cluster="ceph",monitor="test-mon02"} 0.000682`),
regexp.MustCompile(`ceph_monitor_latency_seconds{cluster="ceph",monitor="test-mon03"} 0.000582`),
regexp.MustCompile(`ceph_monitor_latency_seconds{cluster="ceph",monitor="test-mon04"} 0.000667`),
regexp.MustCompile(`ceph_monitor_latency_seconds{cluster="ceph",monitor="test-mon05"} 0.000667`),
},
},
{`
{
"time_skew_status": {
"test-mon01": {
"skew": "wrong!",
"latency": 0.000677,
"health": "HEALTH_OK"
}
}
`,
[]*regexp.Regexp{},
},
{`
{
"time_skew_status": {
"test-mon01": {
"skew": 0.000334,
"latency": "wrong!",
"health": "HEALTH_OK"
}
}
`,
[]*regexp.Regexp{},
},
{`
{
"time_skew_status": {
"test-mon01": {
"skew"::: "0.000334",
"latency"::: "0.000677",
"health": "HEALTH_OK"
}
}
`,
[]*regexp.Regexp{},
},
} {
func() {
collector := NewMonitorCollector(NewNoopConn(tt.input), "ceph")
if err := prometheus.Register(collector); err != nil {
t.Fatalf("collector failed to register: %s", err)
}
defer prometheus.Unregister(collector)
server := httptest.NewServer(prometheus.Handler())
defer server.Close()
resp, err := http.Get(server.URL)
if err != nil {
t.Fatalf("unexpected failed response from prometheus: %s", err)
}
defer resp.Body.Close()
buf, err := ioutil.ReadAll(resp.Body)
if err != nil {
t.Fatalf("failed reading server response: %s", err)
}
for _, re := range tt.regexes {
if !re.Match(buf) {
t.Errorf("failed matching: %q", re)
}
}
}()
}
}