osd: fix parsing invalid json produced by Ceph after 10.2.5 when OSD is out
Due to http://tracker.ceph.com/issues/18239 Ceph will generate invalid json output when there is any of OSD out of the cluster. Since it is invalid, golang is unable to unmarshal the json and can result in confusing metrics. Replacing -nan to 0 is certainly a workaround, but this can temporarily prevent wrong metrics. Fixes: https://github.com/digitalocean/ceph_exporter/issues/48
This commit is contained in:
parent
470908e239
commit
74439767fb
|
@ -4,6 +4,7 @@ import (
|
|||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"bytes"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
@ -316,6 +317,9 @@ func (o *OSDCollector) collect() error {
|
|||
return err
|
||||
}
|
||||
|
||||
// Workaround for Ceph Jewel after 10.2.5 produces invalid json when osd is out
|
||||
buf = bytes.Replace(buf, []byte("-nan"), []byte("0"), -1)
|
||||
|
||||
osdDF := &cephOSDDF{}
|
||||
if err := json.Unmarshal(buf, osdDF); err != nil {
|
||||
return err
|
||||
|
|
|
@ -78,6 +78,21 @@ func TestOSDCollector(t *testing.T) {
|
|||
"utilization": 0.329892,
|
||||
"var": 0.950614,
|
||||
"pgs": 164
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"name": "osd.4",
|
||||
"type": "osd",
|
||||
"type_id": 0,
|
||||
"crush_weight": 0.010391,
|
||||
"depth": 2,
|
||||
"reweight": 0,
|
||||
"kb": 0,
|
||||
"kb_used": 0,
|
||||
"kb_avail": 0,
|
||||
"utilization": -nan,
|
||||
"var": -nan,
|
||||
"pgs": 0
|
||||
}
|
||||
],
|
||||
"stray": [],
|
||||
|
@ -96,38 +111,47 @@ func TestOSDCollector(t *testing.T) {
|
|||
regexp.MustCompile(`ceph_osd_crush_weight{cluster="ceph",osd="osd.1"} 0.010391`),
|
||||
regexp.MustCompile(`ceph_osd_crush_weight{cluster="ceph",osd="osd.2"} 0.010391`),
|
||||
regexp.MustCompile(`ceph_osd_crush_weight{cluster="ceph",osd="osd.3"} 0.010391`),
|
||||
regexp.MustCompile(`ceph_osd_crush_weight{cluster="ceph",osd="osd.4"} 0.010391`),
|
||||
regexp.MustCompile(`ceph_osd_depth{cluster="ceph",osd="osd.0"} 2`),
|
||||
regexp.MustCompile(`ceph_osd_depth{cluster="ceph",osd="osd.1"} 2`),
|
||||
regexp.MustCompile(`ceph_osd_depth{cluster="ceph",osd="osd.2"} 2`),
|
||||
regexp.MustCompile(`ceph_osd_depth{cluster="ceph",osd="osd.3"} 2`),
|
||||
regexp.MustCompile(`ceph_osd_depth{cluster="ceph",osd="osd.4"} 2`),
|
||||
regexp.MustCompile(`ceph_osd_reweight{cluster="ceph",osd="osd.0"} 1`),
|
||||
regexp.MustCompile(`ceph_osd_reweight{cluster="ceph",osd="osd.1"} 1`),
|
||||
regexp.MustCompile(`ceph_osd_reweight{cluster="ceph",osd="osd.2"} 1`),
|
||||
regexp.MustCompile(`ceph_osd_reweight{cluster="ceph",osd="osd.3"} 1`),
|
||||
regexp.MustCompile(`ceph_osd_reweight{cluster="ceph",osd="osd.4"} 0`),
|
||||
regexp.MustCompile(`ceph_osd_bytes{cluster="ceph",osd="osd.0"} 1.1150316e`),
|
||||
regexp.MustCompile(`ceph_osd_bytes{cluster="ceph",osd="osd.1"} 1.1150316e`),
|
||||
regexp.MustCompile(`ceph_osd_bytes{cluster="ceph",osd="osd.2"} 1.1150316e`),
|
||||
regexp.MustCompile(`ceph_osd_bytes{cluster="ceph",osd="osd.3"} 1.1150316e`),
|
||||
regexp.MustCompile(`ceph_osd_bytes{cluster="ceph",osd="osd.4"} 0`),
|
||||
regexp.MustCompile(`ceph_osd_used_bytes{cluster="ceph",osd="osd.0"} 4.0772e`),
|
||||
regexp.MustCompile(`ceph_osd_used_bytes{cluster="ceph",osd="osd.1"} 4.0512e`),
|
||||
regexp.MustCompile(`ceph_osd_used_bytes{cluster="ceph",osd="osd.2"} 3.6712e`),
|
||||
regexp.MustCompile(`ceph_osd_used_bytes{cluster="ceph",osd="osd.3"} 3.6784e`),
|
||||
regexp.MustCompile(`ceph_osd_used_bytes{cluster="ceph",osd="osd.4"} 0`),
|
||||
regexp.MustCompile(`ceph_osd_avail_bytes{cluster="ceph",osd="osd.0"} 1.1109544e`),
|
||||
regexp.MustCompile(`ceph_osd_avail_bytes{cluster="ceph",osd="osd.1"} 1.1109804e`),
|
||||
regexp.MustCompile(`ceph_osd_avail_bytes{cluster="ceph",osd="osd.2"} 1.1113604e`),
|
||||
regexp.MustCompile(`ceph_osd_avail_bytes{cluster="ceph",osd="osd.3"} 1.1113532e`),
|
||||
regexp.MustCompile(`ceph_osd_avail_bytes{cluster="ceph",osd="osd.4"} 0`),
|
||||
regexp.MustCompile(`ceph_osd_utilization{cluster="ceph",osd="osd.0"} 0.365658`),
|
||||
regexp.MustCompile(`ceph_osd_utilization{cluster="ceph",osd="osd.1"} 0.363326`),
|
||||
regexp.MustCompile(`ceph_osd_utilization{cluster="ceph",osd="osd.2"} 0.329246`),
|
||||
regexp.MustCompile(`ceph_osd_utilization{cluster="ceph",osd="osd.3"} 0.329892`),
|
||||
regexp.MustCompile(`ceph_osd_utilization{cluster="ceph",osd="osd.4"} 0`),
|
||||
regexp.MustCompile(`ceph_osd_variance{cluster="ceph",osd="osd.0"} 1.053676`),
|
||||
regexp.MustCompile(`ceph_osd_variance{cluster="ceph",osd="osd.1"} 1.046957`),
|
||||
regexp.MustCompile(`ceph_osd_variance{cluster="ceph",osd="osd.2"} 0.948753`),
|
||||
regexp.MustCompile(`ceph_osd_variance{cluster="ceph",osd="osd.3"} 0.950614`),
|
||||
regexp.MustCompile(`ceph_osd_variance{cluster="ceph",osd="osd.4"} 0`),
|
||||
regexp.MustCompile(`ceph_osd_pgs{cluster="ceph",osd="osd.0"} 283`),
|
||||
regexp.MustCompile(`ceph_osd_pgs{cluster="ceph",osd="osd.1"} 279`),
|
||||
regexp.MustCompile(`ceph_osd_pgs{cluster="ceph",osd="osd.2"} 162`),
|
||||
regexp.MustCompile(`ceph_osd_pgs{cluster="ceph",osd="osd.3"} 164`),
|
||||
regexp.MustCompile(`ceph_osd_pgs{cluster="ceph",osd="osd.4"} 0`),
|
||||
regexp.MustCompile(`ceph_osd_total_bytes{cluster="ceph"} 4.4601264e`),
|
||||
regexp.MustCompile(`ceph_osd_total_used_bytes{cluster="ceph"} 1.5478e`),
|
||||
regexp.MustCompile(`ceph_osd_total_avail_bytes{cluster="ceph"} 4.4446484e`),
|
||||
|
@ -138,6 +162,13 @@ func TestOSDCollector(t *testing.T) {
|
|||
input: `
|
||||
{
|
||||
"osd_perf_infos": [
|
||||
{
|
||||
"id": 4,
|
||||
"perf_stats": {
|
||||
"commit_latency_ms": 0,
|
||||
"apply_latency_ms": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"perf_stats": {
|
||||
|
@ -173,10 +204,12 @@ func TestOSDCollector(t *testing.T) {
|
|||
regexp.MustCompile(`ceph_osd_perf_commit_latency_seconds{cluster="ceph",osd="osd.1"} 0.002`),
|
||||
regexp.MustCompile(`ceph_osd_perf_commit_latency_seconds{cluster="ceph",osd="osd.2"} 0.002`),
|
||||
regexp.MustCompile(`ceph_osd_perf_commit_latency_seconds{cluster="ceph",osd="osd.3"} 0.001`),
|
||||
regexp.MustCompile(`ceph_osd_perf_commit_latency_seconds{cluster="ceph",osd="osd.4"} 0`),
|
||||
regexp.MustCompile(`ceph_osd_perf_apply_latency_seconds{cluster="ceph",osd="osd.0"} 0.031`),
|
||||
regexp.MustCompile(`ceph_osd_perf_apply_latency_seconds{cluster="ceph",osd="osd.1"} 0.039`),
|
||||
regexp.MustCompile(`ceph_osd_perf_apply_latency_seconds{cluster="ceph",osd="osd.2"} 0.079`),
|
||||
regexp.MustCompile(`ceph_osd_perf_apply_latency_seconds{cluster="ceph",osd="osd.3"} 0.064`),
|
||||
regexp.MustCompile(`ceph_osd_perf_apply_latency_seconds{cluster="ceph",osd="osd.4"} 0`),
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -206,6 +239,12 @@ func TestOSDCollector(t *testing.T) {
|
|||
"uuid": "bef98b10",
|
||||
"up": 1,
|
||||
"in": 1
|
||||
},
|
||||
{
|
||||
"osd": 4,
|
||||
"uuid": "5936c9e8",
|
||||
"up": 0,
|
||||
"in": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -215,10 +254,12 @@ func TestOSDCollector(t *testing.T) {
|
|||
regexp.MustCompile(`ceph_osd_in{cluster="ceph",osd="osd.1"} 1`),
|
||||
regexp.MustCompile(`ceph_osd_in{cluster="ceph",osd="osd.2"} 1`),
|
||||
regexp.MustCompile(`ceph_osd_in{cluster="ceph",osd="osd.3"} 1`),
|
||||
regexp.MustCompile(`ceph_osd_in{cluster="ceph",osd="osd.4"} 0`),
|
||||
regexp.MustCompile(`ceph_osd_up{cluster="ceph",osd="osd.0"} 1`),
|
||||
regexp.MustCompile(`ceph_osd_up{cluster="ceph",osd="osd.1"} 1`),
|
||||
regexp.MustCompile(`ceph_osd_up{cluster="ceph",osd="osd.2"} 1`),
|
||||
regexp.MustCompile(`ceph_osd_up{cluster="ceph",osd="osd.3"} 1`),
|
||||
regexp.MustCompile(`ceph_osd_up{cluster="ceph",osd="osd.4"} 0`),
|
||||
},
|
||||
},
|
||||
} {
|
||||
|
|
Loading…
Reference in New Issue