osd: fix parsing invalid json produced by Ceph after 10.2.5 when OSD is out

Due to http://tracker.ceph.com/issues/18239 Ceph will generate invalid
json output when there is any of OSD out of the cluster. Since it is
invalid, golang is unable to unmarshal the json and can result in
confusing metrics. Replacing -nan to 0 is certainly a workaround,
but this can temporarily prevent wrong metrics.

Fixes: https://github.com/digitalocean/ceph_exporter/issues/48
This commit is contained in:
craigchi 2017-04-21 09:32:37 +08:00
parent 470908e239
commit 74439767fb
2 changed files with 45 additions and 0 deletions

View File

@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"log"
"bytes"
"github.com/prometheus/client_golang/prometheus"
)
@ -316,6 +317,9 @@ func (o *OSDCollector) collect() error {
return err
}
// Workaround for Ceph Jewel after 10.2.5 produces invalid json when osd is out
buf = bytes.Replace(buf, []byte("-nan"), []byte("0"), -1)
osdDF := &cephOSDDF{}
if err := json.Unmarshal(buf, osdDF); err != nil {
return err

View File

@ -78,6 +78,21 @@ func TestOSDCollector(t *testing.T) {
"utilization": 0.329892,
"var": 0.950614,
"pgs": 164
},
{
"id": 4,
"name": "osd.4",
"type": "osd",
"type_id": 0,
"crush_weight": 0.010391,
"depth": 2,
"reweight": 0,
"kb": 0,
"kb_used": 0,
"kb_avail": 0,
"utilization": -nan,
"var": -nan,
"pgs": 0
}
],
"stray": [],
@ -96,38 +111,47 @@ func TestOSDCollector(t *testing.T) {
regexp.MustCompile(`ceph_osd_crush_weight{cluster="ceph",osd="osd.1"} 0.010391`),
regexp.MustCompile(`ceph_osd_crush_weight{cluster="ceph",osd="osd.2"} 0.010391`),
regexp.MustCompile(`ceph_osd_crush_weight{cluster="ceph",osd="osd.3"} 0.010391`),
regexp.MustCompile(`ceph_osd_crush_weight{cluster="ceph",osd="osd.4"} 0.010391`),
regexp.MustCompile(`ceph_osd_depth{cluster="ceph",osd="osd.0"} 2`),
regexp.MustCompile(`ceph_osd_depth{cluster="ceph",osd="osd.1"} 2`),
regexp.MustCompile(`ceph_osd_depth{cluster="ceph",osd="osd.2"} 2`),
regexp.MustCompile(`ceph_osd_depth{cluster="ceph",osd="osd.3"} 2`),
regexp.MustCompile(`ceph_osd_depth{cluster="ceph",osd="osd.4"} 2`),
regexp.MustCompile(`ceph_osd_reweight{cluster="ceph",osd="osd.0"} 1`),
regexp.MustCompile(`ceph_osd_reweight{cluster="ceph",osd="osd.1"} 1`),
regexp.MustCompile(`ceph_osd_reweight{cluster="ceph",osd="osd.2"} 1`),
regexp.MustCompile(`ceph_osd_reweight{cluster="ceph",osd="osd.3"} 1`),
regexp.MustCompile(`ceph_osd_reweight{cluster="ceph",osd="osd.4"} 0`),
regexp.MustCompile(`ceph_osd_bytes{cluster="ceph",osd="osd.0"} 1.1150316e`),
regexp.MustCompile(`ceph_osd_bytes{cluster="ceph",osd="osd.1"} 1.1150316e`),
regexp.MustCompile(`ceph_osd_bytes{cluster="ceph",osd="osd.2"} 1.1150316e`),
regexp.MustCompile(`ceph_osd_bytes{cluster="ceph",osd="osd.3"} 1.1150316e`),
regexp.MustCompile(`ceph_osd_bytes{cluster="ceph",osd="osd.4"} 0`),
regexp.MustCompile(`ceph_osd_used_bytes{cluster="ceph",osd="osd.0"} 4.0772e`),
regexp.MustCompile(`ceph_osd_used_bytes{cluster="ceph",osd="osd.1"} 4.0512e`),
regexp.MustCompile(`ceph_osd_used_bytes{cluster="ceph",osd="osd.2"} 3.6712e`),
regexp.MustCompile(`ceph_osd_used_bytes{cluster="ceph",osd="osd.3"} 3.6784e`),
regexp.MustCompile(`ceph_osd_used_bytes{cluster="ceph",osd="osd.4"} 0`),
regexp.MustCompile(`ceph_osd_avail_bytes{cluster="ceph",osd="osd.0"} 1.1109544e`),
regexp.MustCompile(`ceph_osd_avail_bytes{cluster="ceph",osd="osd.1"} 1.1109804e`),
regexp.MustCompile(`ceph_osd_avail_bytes{cluster="ceph",osd="osd.2"} 1.1113604e`),
regexp.MustCompile(`ceph_osd_avail_bytes{cluster="ceph",osd="osd.3"} 1.1113532e`),
regexp.MustCompile(`ceph_osd_avail_bytes{cluster="ceph",osd="osd.4"} 0`),
regexp.MustCompile(`ceph_osd_utilization{cluster="ceph",osd="osd.0"} 0.365658`),
regexp.MustCompile(`ceph_osd_utilization{cluster="ceph",osd="osd.1"} 0.363326`),
regexp.MustCompile(`ceph_osd_utilization{cluster="ceph",osd="osd.2"} 0.329246`),
regexp.MustCompile(`ceph_osd_utilization{cluster="ceph",osd="osd.3"} 0.329892`),
regexp.MustCompile(`ceph_osd_utilization{cluster="ceph",osd="osd.4"} 0`),
regexp.MustCompile(`ceph_osd_variance{cluster="ceph",osd="osd.0"} 1.053676`),
regexp.MustCompile(`ceph_osd_variance{cluster="ceph",osd="osd.1"} 1.046957`),
regexp.MustCompile(`ceph_osd_variance{cluster="ceph",osd="osd.2"} 0.948753`),
regexp.MustCompile(`ceph_osd_variance{cluster="ceph",osd="osd.3"} 0.950614`),
regexp.MustCompile(`ceph_osd_variance{cluster="ceph",osd="osd.4"} 0`),
regexp.MustCompile(`ceph_osd_pgs{cluster="ceph",osd="osd.0"} 283`),
regexp.MustCompile(`ceph_osd_pgs{cluster="ceph",osd="osd.1"} 279`),
regexp.MustCompile(`ceph_osd_pgs{cluster="ceph",osd="osd.2"} 162`),
regexp.MustCompile(`ceph_osd_pgs{cluster="ceph",osd="osd.3"} 164`),
regexp.MustCompile(`ceph_osd_pgs{cluster="ceph",osd="osd.4"} 0`),
regexp.MustCompile(`ceph_osd_total_bytes{cluster="ceph"} 4.4601264e`),
regexp.MustCompile(`ceph_osd_total_used_bytes{cluster="ceph"} 1.5478e`),
regexp.MustCompile(`ceph_osd_total_avail_bytes{cluster="ceph"} 4.4446484e`),
@ -138,6 +162,13 @@ func TestOSDCollector(t *testing.T) {
input: `
{
"osd_perf_infos": [
{
"id": 4,
"perf_stats": {
"commit_latency_ms": 0,
"apply_latency_ms": 0
}
},
{
"id": 3,
"perf_stats": {
@ -173,10 +204,12 @@ func TestOSDCollector(t *testing.T) {
regexp.MustCompile(`ceph_osd_perf_commit_latency_seconds{cluster="ceph",osd="osd.1"} 0.002`),
regexp.MustCompile(`ceph_osd_perf_commit_latency_seconds{cluster="ceph",osd="osd.2"} 0.002`),
regexp.MustCompile(`ceph_osd_perf_commit_latency_seconds{cluster="ceph",osd="osd.3"} 0.001`),
regexp.MustCompile(`ceph_osd_perf_commit_latency_seconds{cluster="ceph",osd="osd.4"} 0`),
regexp.MustCompile(`ceph_osd_perf_apply_latency_seconds{cluster="ceph",osd="osd.0"} 0.031`),
regexp.MustCompile(`ceph_osd_perf_apply_latency_seconds{cluster="ceph",osd="osd.1"} 0.039`),
regexp.MustCompile(`ceph_osd_perf_apply_latency_seconds{cluster="ceph",osd="osd.2"} 0.079`),
regexp.MustCompile(`ceph_osd_perf_apply_latency_seconds{cluster="ceph",osd="osd.3"} 0.064`),
regexp.MustCompile(`ceph_osd_perf_apply_latency_seconds{cluster="ceph",osd="osd.4"} 0`),
},
},
{
@ -206,6 +239,12 @@ func TestOSDCollector(t *testing.T) {
"uuid": "bef98b10",
"up": 1,
"in": 1
},
{
"osd": 4,
"uuid": "5936c9e8",
"up": 0,
"in": 0
}
]
}
@ -215,10 +254,12 @@ func TestOSDCollector(t *testing.T) {
regexp.MustCompile(`ceph_osd_in{cluster="ceph",osd="osd.1"} 1`),
regexp.MustCompile(`ceph_osd_in{cluster="ceph",osd="osd.2"} 1`),
regexp.MustCompile(`ceph_osd_in{cluster="ceph",osd="osd.3"} 1`),
regexp.MustCompile(`ceph_osd_in{cluster="ceph",osd="osd.4"} 0`),
regexp.MustCompile(`ceph_osd_up{cluster="ceph",osd="osd.0"} 1`),
regexp.MustCompile(`ceph_osd_up{cluster="ceph",osd="osd.1"} 1`),
regexp.MustCompile(`ceph_osd_up{cluster="ceph",osd="osd.2"} 1`),
regexp.MustCompile(`ceph_osd_up{cluster="ceph",osd="osd.3"} 1`),
regexp.MustCompile(`ceph_osd_up{cluster="ceph",osd="osd.4"} 0`),
},
},
} {