830 lines
23 KiB
Go
830 lines
23 KiB
Go
// Copyright 2022 DigitalOcean
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package ceph
|
|
|
|
import (
|
|
"io/ioutil"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"regexp"
|
|
"testing"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
"github.com/sirupsen/logrus"
|
|
"github.com/stretchr/testify/mock"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestClusterHealthCollector(t *testing.T) {
|
|
allVersions := []*Version{Nautilus, Octopus, Pacific}
|
|
nautilusOnly := []*Version{Nautilus}
|
|
octopusPlus := []*Version{Octopus, Pacific}
|
|
for _, tt := range []struct {
|
|
name string
|
|
versions []*Version // Defaults to allVersions if not provided.
|
|
input string
|
|
reMatch []*regexp.Regexp
|
|
}{
|
|
{
|
|
name: "15 pgs stuck degraded",
|
|
input: `
|
|
{
|
|
"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "15 pgs stuck degraded"}]}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`stuck_degraded_pgs{cluster="ceph"} 15`),
|
|
},
|
|
},
|
|
{
|
|
name: "16 pgs stuck unclean",
|
|
input: `
|
|
{
|
|
"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "16 pgs stuck unclean"}]}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`stuck_unclean_pgs{cluster="ceph"} 16`),
|
|
},
|
|
},
|
|
{
|
|
name: "17 pgs stuck undersized",
|
|
input: `
|
|
{
|
|
"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "17 pgs stuck undersized"}]}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`stuck_undersized_pgs{cluster="ceph"} 17`),
|
|
},
|
|
},
|
|
{
|
|
name: "18 pgs stuck stale",
|
|
input: `
|
|
{
|
|
"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "18 pgs stuck stale"}]}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`stuck_stale_pgs{cluster="ceph"} 18`),
|
|
},
|
|
},
|
|
{
|
|
name: "10 degraded objects",
|
|
input: `
|
|
{
|
|
"pgmap": { "degraded_objects": 10 }
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`degraded_objects{cluster="ceph"} 10`),
|
|
},
|
|
},
|
|
{
|
|
name: "20 misplaced objects",
|
|
input: `
|
|
{
|
|
"pgmap": { "misplaced_objects": 20 }
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`misplaced_objects{cluster="ceph"} 20`),
|
|
},
|
|
},
|
|
{
|
|
name: "10 down osds",
|
|
versions: nautilusOnly,
|
|
input: `
|
|
{
|
|
"osdmap": {
|
|
"osdmap": {
|
|
"num_osds": 20,
|
|
"num_up_osds": 10,
|
|
"num_in_osds": 0,
|
|
"num_remapped_pgs": 0
|
|
}
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`osds_down{cluster="ceph"} 10`),
|
|
},
|
|
},
|
|
{
|
|
name: "normal osdmap",
|
|
versions: nautilusOnly,
|
|
input: `
|
|
{
|
|
"osdmap": {
|
|
"osdmap": {
|
|
"num_osds": 1200,
|
|
"num_up_osds": 1200,
|
|
"num_in_osds": 1190,
|
|
"num_remapped_pgs": 10
|
|
}
|
|
},
|
|
"health": {"summary": []}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`osds{cluster="ceph"} 1200`),
|
|
regexp.MustCompile(`osds_up{cluster="ceph"} 1200`),
|
|
regexp.MustCompile(`osds_in{cluster="ceph"} 1190`),
|
|
regexp.MustCompile(`pgs_remapped{cluster="ceph"} 10`),
|
|
},
|
|
},
|
|
{
|
|
name: "10 down osds",
|
|
versions: octopusPlus,
|
|
input: `
|
|
{
|
|
"osdmap": {
|
|
"num_osds": 20,
|
|
"num_up_osds": 10,
|
|
"num_in_osds": 0,
|
|
"num_remapped_pgs": 0
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`osds_down{cluster="ceph"} 10`),
|
|
},
|
|
},
|
|
{
|
|
name: "normal osdmap",
|
|
versions: octopusPlus,
|
|
input: `
|
|
{
|
|
"osdmap": {
|
|
"num_osds": 1200,
|
|
"num_up_osds": 1200,
|
|
"num_in_osds": 1190,
|
|
"num_remapped_pgs": 10
|
|
},
|
|
"health": {"summary": []}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`osds{cluster="ceph"} 1200`),
|
|
regexp.MustCompile(`osds_up{cluster="ceph"} 1200`),
|
|
regexp.MustCompile(`osds_in{cluster="ceph"} 1190`),
|
|
regexp.MustCompile(`pgs_remapped{cluster="ceph"} 10`),
|
|
},
|
|
},
|
|
{
|
|
name: "health ok",
|
|
input: `{"health": { "status": "HEALTH_OK" } }`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`health_status{cluster="ceph"} 0`),
|
|
},
|
|
},
|
|
{
|
|
name: "health warn",
|
|
input: `{"health": { "status": "HEALTH_OK" } }`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`health_status{cluster="ceph"} 0`),
|
|
regexp.MustCompile(`health_status_interp{cluster="ceph"} 0`),
|
|
},
|
|
},
|
|
{
|
|
name: "health ok 2",
|
|
input: `{"health": { "status": "HEALTH_OK" } }`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`health_status{cluster="ceph"} 0`),
|
|
regexp.MustCompile(`health_status_interp{cluster="ceph"} 0`),
|
|
},
|
|
},
|
|
{
|
|
name: "health warn 2",
|
|
input: `{"health": { "status": "HEALTH_WARN" } }`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`health_status{cluster="ceph"} 1`),
|
|
regexp.MustCompile(`health_status_interp{cluster="ceph"} 2`),
|
|
},
|
|
},
|
|
{
|
|
name: "health err",
|
|
input: `{"health": { "status": "HEALTH_ERR" } }`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`health_status{cluster="ceph"} 2`),
|
|
regexp.MustCompile(`health_status_interp{cluster="ceph"} 3`),
|
|
},
|
|
},
|
|
{
|
|
name: "cluster statistics",
|
|
input: `
|
|
$ sudo ceph -s
|
|
cluster eff51be8-938a-4afa-b0d1-7a580b4ceb37
|
|
health HEALTH_OK
|
|
monmap e3: 3 mons at {mon01,mon02,mon03}
|
|
recovery io 5779 MB/s, 4 keys/s, 1522 objects/s
|
|
client io 4273 kB/s rd, 2740 MB/s wr, 2863 op/s
|
|
`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`recovery_io_bytes{cluster="ceph"} 5.779e`),
|
|
regexp.MustCompile(`recovery_io_keys{cluster="ceph"} 4`),
|
|
regexp.MustCompile(`recovery_io_objects{cluster="ceph"} 1522`),
|
|
regexp.MustCompile(`client_io_ops{cluster="ceph"} 2863`),
|
|
regexp.MustCompile(`client_io_read_bytes{cluster="ceph"} 4.273e`),
|
|
regexp.MustCompile(`client_io_write_bytes{cluster="ceph"} 2.74e`),
|
|
},
|
|
},
|
|
{
|
|
name: "cluster statistics 2",
|
|
input: `
|
|
$ sudo ceph -s
|
|
cluster eff51be8-938a-4afa-b0d1-7a580b4ceb37
|
|
health HEALTH_OK
|
|
monmap e3: 3 mons at {mon01,mon02,mon03}
|
|
recovery io 5779 MB/s, 4 keys/s, 1522 objects/s
|
|
client io 2863 op/s rd, 5847 op/s wr
|
|
cache io 251 MB/s flush, 6646 kB/s evict, 55 op/s promote
|
|
`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`recovery_io_bytes{cluster="ceph"} 5.779e`),
|
|
regexp.MustCompile(`recovery_io_keys{cluster="ceph"} 4`),
|
|
regexp.MustCompile(`recovery_io_objects{cluster="ceph"} 1522`),
|
|
regexp.MustCompile(`client_io_ops{cluster="ceph"} 8710`),
|
|
regexp.MustCompile(`client_io_read_ops{cluster="ceph"} 2863`),
|
|
regexp.MustCompile(`client_io_write_ops{cluster="ceph"} 5847`),
|
|
regexp.MustCompile(`cache_flush_io_bytes{cluster="ceph"} 2.51e`),
|
|
regexp.MustCompile(`cache_evict_io_bytes{cluster="ceph"} 6.646e`),
|
|
regexp.MustCompile(`cache_promote_io_ops{cluster="ceph"} 55`),
|
|
},
|
|
},
|
|
{
|
|
name: "pg statistics",
|
|
input: `
|
|
{
|
|
"pgmap": { "num_pgs": 52000, "num_objects": 13156 },
|
|
"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "7 pgs undersized"}]}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`total_pgs{cluster="ceph"} 52000`),
|
|
regexp.MustCompile(`cluster_objects{cluster="ceph"} 13156`),
|
|
},
|
|
},
|
|
{
|
|
name: "pg states",
|
|
input: `
|
|
{
|
|
"pgmap": {
|
|
"pgs_by_state": [
|
|
{
|
|
"state_name": "active+clean+scrubbing",
|
|
"count": 2
|
|
},
|
|
{
|
|
"state_name": "active+clean+scrubbing+deep",
|
|
"count": 5
|
|
},
|
|
{
|
|
"state_name": "active+clean+inconsistent",
|
|
"count": 1
|
|
},
|
|
{
|
|
"state_name": "active+clean+repair",
|
|
"count": 1
|
|
},
|
|
{
|
|
"state_name": "active+clean+snaptrim",
|
|
"count": 15
|
|
},
|
|
{
|
|
"state_name": "active+clean+snaptrim_wait",
|
|
"count": 25
|
|
}
|
|
],
|
|
"num_pgs": 52000,
|
|
"num_objects": 13156
|
|
},
|
|
"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "7 pgs undersized"}]}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`active_pgs{cluster="ceph"} 49`),
|
|
regexp.MustCompile(`scrubbing_pgs{cluster="ceph"} 2`),
|
|
regexp.MustCompile(`deep_scrubbing_pgs{cluster="ceph"} 5`),
|
|
regexp.MustCompile(`inconsistent_pgs{cluster="ceph"} 1`),
|
|
regexp.MustCompile(`cluster_objects{cluster="ceph"} 13156`),
|
|
regexp.MustCompile(`snaptrim_pgs{cluster="ceph"} 15`),
|
|
regexp.MustCompile(`snaptrim_wait_pgs{cluster="ceph"} 25`),
|
|
regexp.MustCompile(`repairing_pgs{cluster="ceph"} 1`),
|
|
},
|
|
},
|
|
{
|
|
name: "mon down",
|
|
input: `
|
|
{
|
|
"health": {
|
|
"checks": {
|
|
"MON_DOWN": {
|
|
"severity": "HEALTH_WARN",
|
|
"summary": {
|
|
"message": "1/3 mons down, quorum a,b"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`mons_down{cluster="ceph"} 1`),
|
|
},
|
|
},
|
|
{
|
|
name: "slow ops",
|
|
input: `
|
|
{
|
|
"health": {
|
|
"summary": [
|
|
{
|
|
"severity": "SLOW_OPS",
|
|
"summary": "3 slow ops, oldest one blocked for 1 sec, osd.39 has slow ops"
|
|
}
|
|
]
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`slow_requests{cluster="ceph"} 3`),
|
|
},
|
|
},
|
|
{
|
|
name: "slow and blocked ops",
|
|
input: `
|
|
{
|
|
"health": {
|
|
"checks": {
|
|
"SLOW_OPS": {
|
|
"severity": "HEALTH_WARN",
|
|
"summary": {
|
|
"message": "3 slow ops, oldest one blocked for 1 sec, osd.39 has slow ops"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`slow_requests{cluster="ceph"} 3`),
|
|
},
|
|
},
|
|
{
|
|
name: "more slow and blocked ops",
|
|
input: `
|
|
{
|
|
"health": {
|
|
"checks": {
|
|
"SLOW_OPS": {
|
|
"severity": "HEALTH_WARN",
|
|
"summary": {
|
|
"message": "18 slow ops, oldest one blocked for 1 sec, daemons [osd.114,osd.116,osd.33,osd.34,osd.43,osd.49,osd.53] have slow ops."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`slow_requests{cluster="ceph"} 18`),
|
|
},
|
|
},
|
|
{
|
|
name: "degraded cluster",
|
|
input: `
|
|
{
|
|
"health": {
|
|
"checks": {
|
|
"PG_DEGRADED": {
|
|
"severity": "HEALTH_WARN",
|
|
"summary": {
|
|
"message": "Degraded data redundancy: 154443937/17497658377 objects degraded (0.883%), 4886 pgs unclean, 4317 pgs degraded, 516 pgs undersized"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"pgmap": { "degraded_objects": 154443937 }
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`degraded_objects{cluster="ceph"} 1.54443937e\+08`),
|
|
regexp.MustCompile(`health_status_interp{cluster="ceph"} 1`),
|
|
},
|
|
},
|
|
{
|
|
name: "crashed daemons",
|
|
input: `
|
|
{
|
|
"health": {
|
|
"checks": {
|
|
"RECENT_CRASH": {
|
|
"severity": "HEALTH_WARN",
|
|
"summary": {
|
|
"message": "2 daemons have recently crashed"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`new_crash_reports{cluster="ceph"} 2`),
|
|
regexp.MustCompile(`health_status_interp{cluster="ceph"} 1`),
|
|
},
|
|
},
|
|
{
|
|
name: "too many repaired reads",
|
|
input: `
|
|
{
|
|
"health": {
|
|
"checks": {
|
|
"OSD_TOO_MANY_REPAIRS": {
|
|
"severity": "HEALTH_WARN",
|
|
"summary": {
|
|
"message": "Too many repaired reads on 25 OSDs"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`osds_too_many_repair{cluster="ceph"} 25`),
|
|
regexp.MustCompile(`health_status_interp{cluster="ceph"} 1`),
|
|
},
|
|
},
|
|
{
|
|
name: "not enabled on 1 pool",
|
|
input: `
|
|
{
|
|
"health": {
|
|
"checks": {
|
|
"POOL_APP_NOT_ENABLED": {
|
|
"severity": "HEALTH_WARN",
|
|
"summary": {
|
|
"message": "application not enabled on 1 pool(s)"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`health_status_interp{cluster="ceph"} 2`),
|
|
},
|
|
},
|
|
{
|
|
name: "many flags set",
|
|
input: `
|
|
{
|
|
"health": {
|
|
"checks": {
|
|
"OSDMAP_FLAGS": {
|
|
"severity": "HEALTH_WARN",
|
|
"summary": {
|
|
"message": "pauserd,pausewr,noout,noin,norecover,noscrub,notieragent flag(s) set; mon 482f68d873d2 is low on available space"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`osdmap_flag_full{cluster="ceph"} 0`),
|
|
regexp.MustCompile(`osdmap_flag_pauserd{cluster="ceph"} 1`),
|
|
regexp.MustCompile(`osdmap_flag_pausewr{cluster="ceph"} 1`),
|
|
regexp.MustCompile(`osdmap_flag_noup{cluster="ceph"} 0`),
|
|
regexp.MustCompile(`osdmap_flag_nodown{cluster="ceph"} 0`),
|
|
regexp.MustCompile(`osdmap_flag_noin{cluster="ceph"} 1`),
|
|
regexp.MustCompile(`osdmap_flag_noout{cluster="ceph"} 1`),
|
|
regexp.MustCompile(`osdmap_flag_nobackfill{cluster="ceph"} 0`),
|
|
regexp.MustCompile(`osdmap_flag_norecover{cluster="ceph"} 1`),
|
|
regexp.MustCompile(`osdmap_flag_norebalance{cluster="ceph"} 0`),
|
|
regexp.MustCompile(`osdmap_flag_noscrub{cluster="ceph"} 1`),
|
|
regexp.MustCompile(`osdmap_flag_nodeep_scrub{cluster="ceph"} 0`),
|
|
regexp.MustCompile(`osdmap_flag_notieragent{cluster="ceph"} 1`),
|
|
regexp.MustCompile(`health_status_interp{cluster="ceph"} 1`),
|
|
},
|
|
},
|
|
{
|
|
name: "many flags set new osdmap constmetrics filled",
|
|
input: `
|
|
{
|
|
"health": {
|
|
"checks": {
|
|
"OSDMAP_FLAGS": {
|
|
"severity": "HEALTH_WARN",
|
|
"summary": {
|
|
"message": "pauserd,pausewr,noout,noin,norecover,noscrub,notieragent,newhypotheticalcephflag flag(s) set; mon 482f68d873d2 is low on available space"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="pauserd"} 1`),
|
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="pausewr"} 1`),
|
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noin"} 1`),
|
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noout"} 1`),
|
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="norecover"} 1`),
|
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="noscrub"} 1`),
|
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="notieragent"} 1`),
|
|
regexp.MustCompile(`osd_map_flags{cluster="ceph",flag="newhypotheticalcephflag"} 1`),
|
|
regexp.MustCompile(`health_status_interp{cluster="ceph"} 1`),
|
|
},
|
|
},
|
|
{
|
|
name: "lots of PG data",
|
|
input: `
|
|
{
|
|
"pgmap": {
|
|
"write_op_per_sec": 500,
|
|
"read_op_per_sec": 1000,
|
|
"write_bytes_sec": 829694017,
|
|
"read_bytes_sec": 941980516,
|
|
"degraded_ratio": 0.213363,
|
|
"degraded_total": 7488077,
|
|
"degraded_objects": 1597678,
|
|
"pgs_by_state": [
|
|
{
|
|
"count": 10,
|
|
"state_name": "active+undersized+degraded"
|
|
},
|
|
{
|
|
"count": 20,
|
|
"state_name": "active+clean"
|
|
},
|
|
{
|
|
"count": 10,
|
|
"state_name": "undersized+degraded+peered"
|
|
},
|
|
{
|
|
"count": 20,
|
|
"state_name": "activating+undersized+degraded"
|
|
},
|
|
{
|
|
"count": 30,
|
|
"state_name": "activating+stale+unclean"
|
|
},
|
|
{
|
|
"count": 10,
|
|
"state_name": "peering"
|
|
},
|
|
{
|
|
"count": 20,
|
|
"state_name": "scrubbing"
|
|
},
|
|
{
|
|
"count": 10,
|
|
"state_name": "scrubbing+deep"
|
|
},
|
|
{
|
|
"state_name": "remapped+recovering",
|
|
"count": 5
|
|
},
|
|
{
|
|
"state_name": "active+remapped+backfilling",
|
|
"count": 2
|
|
},
|
|
{
|
|
"state_name": "recovery_wait+inconsistent",
|
|
"count": 2
|
|
},
|
|
{
|
|
"state_name": "recovery_wait+remapped",
|
|
"count": 1
|
|
},
|
|
{
|
|
"state_name": "active+undersized+remapped+backfill_wait",
|
|
"count": 1
|
|
},
|
|
{
|
|
"state_name": "active+undersized+remapped+backfill_wait+forced_backfill",
|
|
"count": 10
|
|
},
|
|
{
|
|
"state_name": "down",
|
|
"count": 6
|
|
},
|
|
{
|
|
"state_name": "down+remapped",
|
|
"count": 31
|
|
},
|
|
{
|
|
"state_name": "active+forced_recovery+undersized",
|
|
"count": 1
|
|
},
|
|
{
|
|
"state_name": "remapped+incomplete",
|
|
"count": 2
|
|
}
|
|
],
|
|
"num_pgs": 9208,
|
|
"num_pools": 29,
|
|
"num_objects": 1315631,
|
|
"data_bytes": 1230716754607,
|
|
"recovering_objects_per_sec": 140,
|
|
"recovering_bytes_per_sec": 65536,
|
|
"recovering_keys_per_sec": 25,
|
|
"flush_bytes_sec": 59300,
|
|
"evict_bytes_sec": 3000,
|
|
"promote_op_per_sec": 1000,
|
|
"bytes_used": 1861238087680,
|
|
"bytes_avail": 2535859327381504,
|
|
"bytes_total": 2537720565469184
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`active_pgs{cluster="ceph"} 44`),
|
|
regexp.MustCompile(`degraded_pgs{cluster="ceph"} 40`),
|
|
regexp.MustCompile(`unclean_pgs{cluster="ceph"} 30`),
|
|
regexp.MustCompile(`undersized_pgs{cluster="ceph"} 52`),
|
|
regexp.MustCompile(`stale_pgs{cluster="ceph"} 30`),
|
|
regexp.MustCompile(`peering_pgs{cluster="ceph"} 10`),
|
|
regexp.MustCompile(`scrubbing_pgs{cluster="ceph"} 20`),
|
|
regexp.MustCompile(`deep_scrubbing_pgs{cluster="ceph"} 10`),
|
|
regexp.MustCompile(`recovering_pgs{cluster="ceph"} 5`),
|
|
regexp.MustCompile(`recovery_wait_pgs{cluster="ceph"} 3`),
|
|
regexp.MustCompile(`backfilling_pgs{cluster="ceph"} 2`),
|
|
regexp.MustCompile(`backfill_wait_pgs{cluster="ceph"} 11`),
|
|
regexp.MustCompile(`forced_recovery_pgs{cluster="ceph"} 1`),
|
|
regexp.MustCompile(`forced_backfill_pgs{cluster="ceph"} 10`),
|
|
regexp.MustCompile(`down_pgs{cluster="ceph"} 37`),
|
|
regexp.MustCompile(`incomplete_pgs{cluster="ceph"} 2`),
|
|
regexp.MustCompile(`recovery_io_bytes{cluster="ceph"} 65536`),
|
|
regexp.MustCompile(`recovery_io_keys{cluster="ceph"} 25`),
|
|
regexp.MustCompile(`recovery_io_objects{cluster="ceph"} 140`),
|
|
regexp.MustCompile(`client_io_ops{cluster="ceph"} 1500`),
|
|
regexp.MustCompile(`client_io_read_ops{cluster="ceph"} 1000`),
|
|
regexp.MustCompile(`client_io_write_ops{cluster="ceph"} 500`),
|
|
regexp.MustCompile(`cache_flush_io_bytes{cluster="ceph"} 59300`),
|
|
regexp.MustCompile(`cache_evict_io_bytes{cluster="ceph"} 3000`),
|
|
regexp.MustCompile(`cache_promote_io_ops{cluster="ceph"} 1000`),
|
|
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="active"} 44`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="degraded"} 40`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="unclean"} 30`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="undersized"} 52`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="stale"} 30`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="peering"} 10`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="scrubbing"} 20`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="deep_scrubbing"} 10`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="recovering"} 5`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="recovery_wait"} 3`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="backfilling"} 2`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="backfill_wait"} 11`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="forced_recovery"} 1`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="forced_backfill"} 10`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="down"} 37`),
|
|
regexp.MustCompile(`pg_state{cluster="ceph",state="incomplete"} 2`),
|
|
},
|
|
},
|
|
{
|
|
name: "manager map",
|
|
versions: nautilusOnly,
|
|
input: `
|
|
{
|
|
"mgrmap": {
|
|
"epoch": 627,
|
|
"active_gid": 48000003,
|
|
"active_name": "mon03",
|
|
"active_addr": "10.0.0.3:6800/1746",
|
|
"available": true,
|
|
"standbys": [
|
|
{
|
|
"gid": 48000001,
|
|
"name": "mon01",
|
|
"available_modules": [
|
|
"balancer",
|
|
"dashboard",
|
|
"influx"
|
|
]
|
|
},
|
|
{
|
|
"gid": 48000002,
|
|
"name": "mon02",
|
|
"available_modules": [
|
|
"balancer",
|
|
"dashboard",
|
|
"influx"
|
|
]
|
|
}
|
|
],
|
|
"modules": [
|
|
"dashboard",
|
|
"restful",
|
|
"status"
|
|
],
|
|
"available_modules": [
|
|
"balancer",
|
|
"dashboard",
|
|
"influx"
|
|
],
|
|
"services": {
|
|
"dashboard": "http://mon01:7000/"
|
|
}
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`mgrs_active{cluster="ceph"} 1`),
|
|
regexp.MustCompile(`mgrs{cluster="ceph"} 3`),
|
|
},
|
|
},
|
|
{
|
|
name: "manager map",
|
|
versions: octopusPlus,
|
|
input: `
|
|
{
|
|
"mgrmap": {
|
|
"available": true,
|
|
"num_standbys": 2,
|
|
"modules": [
|
|
"iostat",
|
|
"pg_autoscaler",
|
|
"prometheus",
|
|
"restful"
|
|
],
|
|
"services": {
|
|
"prometheus": "http://10.117.16.32:9283/",
|
|
"restful": "https://10.117.16.32:8003/"
|
|
}
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`mgrs_active{cluster="ceph"} 1`),
|
|
regexp.MustCompile(`mgrs{cluster="ceph"} 3`),
|
|
},
|
|
},
|
|
{
|
|
name: "service map (rbd-mirror)",
|
|
input: `
|
|
{
|
|
"servicemap": {
|
|
"epoch": 30,
|
|
"modified": "2020-07-13 22:21:53.278589",
|
|
"services": {
|
|
"rbd-mirror": {
|
|
"daemons": {
|
|
"summary": "",
|
|
"681363": {
|
|
"start_epoch": 4,
|
|
"start_stamp": "2020-09-24 04:27:12.310285",
|
|
"addr": "10.39.70.112:0/2856123533",
|
|
"metadata": {
|
|
"arch": "x86_64",
|
|
"id": "prod-mon01-block01"
|
|
}
|
|
},
|
|
|
|
"681474": {
|
|
"start_epoch": 6,
|
|
"start_stamp": "2020-09-24 04:28:44.500861",
|
|
"addr": "10.39.70.111:0/3132809711",
|
|
"metadata": {
|
|
"arch": "x86_64",
|
|
"id": "prod-mon02-block01"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}`,
|
|
reMatch: []*regexp.Regexp{
|
|
regexp.MustCompile(`rbd_mirror_up{cluster="ceph",\s*name="prod-mon01-block01"} 1`),
|
|
regexp.MustCompile(`rbd_mirror_up{cluster="ceph",\s*name="prod-mon02-block01"} 1`),
|
|
},
|
|
},
|
|
} {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
versions := allVersions
|
|
if len(tt.versions) > 0 {
|
|
versions = tt.versions
|
|
}
|
|
for _, version := range versions {
|
|
t.Run(version.String(), func(t *testing.T) {
|
|
conn := &MockConn{}
|
|
conn.On("MonCommand", mock.Anything).Return(
|
|
[]byte(tt.input), "", nil,
|
|
)
|
|
|
|
collector := NewClusterHealthCollector(&Exporter{Conn: conn, Cluster: "ceph", Logger: logrus.New(), Version: version})
|
|
err := prometheus.Register(collector)
|
|
require.NoError(t, err)
|
|
defer prometheus.Unregister(collector)
|
|
|
|
server := httptest.NewServer(promhttp.Handler())
|
|
defer server.Close()
|
|
|
|
resp, err := http.Get(server.URL)
|
|
require.NoError(t, err)
|
|
defer resp.Body.Close()
|
|
|
|
buf, err := ioutil.ReadAll(resp.Body)
|
|
require.NoError(t, err)
|
|
|
|
for _, re := range tt.reMatch {
|
|
if !re.Match(buf) {
|
|
t.Errorf("expected %s to match\n", re.String())
|
|
}
|
|
}
|
|
})
|
|
}
|
|
})
|
|
}
|
|
}
|