Use JSON output from `ceph crash ls` instead of plain output
This commit is contained in:
parent
458b79492c
commit
c34a208ff6
|
@ -15,19 +15,14 @@
|
|||
package ceph
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"regexp"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var (
|
||||
crashLsLineRegex = regexp.MustCompile(`.*_[0-9a-f-]{36}\s+(\S+)\s*(\*)?`)
|
||||
|
||||
statusNames = map[bool]string{true: "new", false: "archived"}
|
||||
)
|
||||
|
||||
|
@ -56,7 +51,7 @@ func NewCrashesCollector(exporter *Exporter) *CrashesCollector {
|
|||
crashReportsDesc: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_crash_reports", cephNamespace),
|
||||
"Count of crashes reports per daemon, according to `ceph crash ls`",
|
||||
[]string{"daemon", "status"},
|
||||
[]string{"entity", "status"},
|
||||
labels,
|
||||
),
|
||||
}
|
||||
|
@ -69,6 +64,11 @@ type crashEntry struct {
|
|||
isNew bool
|
||||
}
|
||||
|
||||
type cephCrashLs struct {
|
||||
Entity string `json:"entity_name"`
|
||||
Archived string `json:"archived"`
|
||||
}
|
||||
|
||||
// getCrashLs runs the 'crash ls' command and parses its results
|
||||
func (c *CrashesCollector) getCrashLs() (map[crashEntry]int, error) {
|
||||
crashes := make(map[crashEntry]int)
|
||||
|
@ -78,7 +78,7 @@ func (c *CrashesCollector) getCrashLs() (map[crashEntry]int, error) {
|
|||
// to process in an outage storm.
|
||||
cmd, err := json.Marshal(map[string]interface{}{
|
||||
"prefix": "crash ls",
|
||||
"format": "plain",
|
||||
"format": "json",
|
||||
})
|
||||
if err != nil {
|
||||
return crashes, err
|
||||
|
@ -89,15 +89,13 @@ func (c *CrashesCollector) getCrashLs() (map[crashEntry]int, error) {
|
|||
return crashes, err
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(bytes.NewBuffer(buf))
|
||||
for scanner.Scan() {
|
||||
matched := crashLsLineRegex.FindStringSubmatch(scanner.Text())
|
||||
if len(matched) == 3 {
|
||||
crashes[crashEntry{matched[1], matched[2] == "*"}]++
|
||||
} else if len(matched) == 2 {
|
||||
// Just in case the line-end spaces were stripped
|
||||
crashes[crashEntry{matched[1], false}]++
|
||||
}
|
||||
var crashData []cephCrashLs
|
||||
if err = json.Unmarshal(buf, &crashData); err != nil {
|
||||
return crashes, err
|
||||
}
|
||||
|
||||
for _, crash := range crashData {
|
||||
crashes[crashEntry{crash.Entity, len(crash.Archived) == 0}]++
|
||||
}
|
||||
|
||||
return crashes, nil
|
||||
|
|
|
@ -36,64 +36,139 @@ func TestCrashesCollector(t *testing.T) {
|
|||
reMatch []*regexp.Regexp
|
||||
}{
|
||||
{
|
||||
// Example with the full output, further examples will be simpler
|
||||
name: "single new crash",
|
||||
input: `
|
||||
ID ENTITY NEW
|
||||
2022-02-01_21:02:46.687015Z_0de8b741-b323-4f63-828a-e460294e28b9 osd.0 *
|
||||
`,
|
||||
[
|
||||
{
|
||||
"os_version_id": "7",
|
||||
"assert_condition": "p != obs_call_gate.end()",
|
||||
"utsname_release": "5.10.53-138-generic",
|
||||
"os_name": "CentOS Linux",
|
||||
"entity_name": "client.admin",
|
||||
"assert_file": "/ceph/src/common/config_proxy.h",
|
||||
"timestamp": "2022-01-25 21:03:38.371403Z",
|
||||
"process_name": "rbd-nbd",
|
||||
"utsname_machine": "x86_64",
|
||||
"utsname_sysname": "Linux",
|
||||
"os_version": "7 (Core)",
|
||||
"os_id": "centos",
|
||||
"assert_thread_name": "rbd-nbd",
|
||||
"utsname_version": "#4745ab954 SMP Fri Oct 22 23:05:54 UTC 2021",
|
||||
"backtrace": [
|
||||
"(()+0xe54d4) [0x5561b4a744d4]",
|
||||
"(()+0xf630) [0x7f18aac9f630]",
|
||||
"(gsignal()+0x37) [0x7f18a9256387]",
|
||||
"(abort()+0x148) [0x7f18a9257a78]",
|
||||
"(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x199) [0x7f18ac7dce46]",
|
||||
"(()+0x25cfbf) [0x7f18ac7dcfbf]",
|
||||
"(ConfigProxy::call_gate_enter(ceph::md_config_obs_impl<ConfigProxy>*)+0x79) [0x5561b4a6cc67]",
|
||||
"(ConfigProxy::map_observer_changes(ceph::md_config_obs_impl<ConfigProxy>*, std::string const&, std::map<ceph::md_config_obs_impl<ConfigProxy>*, std::set<std::string, std::less<std::string>, std::allocator<std::string> >, std::less<ceph::md_config_obs_impl<ConfigProxy>*>, std::allocator<std::pair<ceph::md_config_obs_impl<ConfigProxy>* const, std::set<std::string, std::less<std::string>, std::allocator<std::string> > > > >*)+0x120) [0x5561b4a6d0a2]",
|
||||
"(ConfigProxy::_gather_changes(std::set<std::string, std::less<std::string>, std::allocator<std::string> >&, std::map<ceph::md_config_obs_impl<ConfigProxy>*, std::set<std::string, std::less<std::string>, std::allocator<std::string> >, std::less<ceph::md_config_obs_impl<ConfigProxy>*>, std::allocator<std::pair<ceph::md_config_obs_impl<ConfigProxy>* const, std::set<std::string, std::less<std::string>, std::allocator<std::string> > > > >*, std::ostream*)::{lambda(ceph::md_config_obs_impl<ConfigProxy>*, std::string const&)#1}::operator()(ceph::md_config_obs_impl<ConfigProxy>*, std::string const&) const+0x33) [0x5561b4a6d651]",
|
||||
"(std::_Function_handler<void (ceph::md_config_obs_impl<ConfigProxy>*, std::string const&), ConfigProxy::_gather_changes(std::set<std::string, std::less<std::string>, std::allocator<std::string> >&, std::map<ceph::md_config_obs_impl<ConfigProxy>*, std::set<std::string, std::less<std::string>, std::allocator<std::string> >, std::less<ceph::md_config_obs_impl<ConfigProxy>*>, std::allocator<std::pair<ceph::md_config_obs_impl<ConfigProxy>* const, std::set<std::string, std::less<std::string>, std::allocator<std::string> > > > >*, std::ostream*)::{lambda(ceph::md_config_obs_impl<ConfigProxy>*, std::string const&)#1}>::_M_invoke(std::_Any_data const&, ceph::md_config_obs_impl<ConfigProxy>*&&, std::string const&)+0x52) [0x5561b4a6f11e]",
|
||||
"(std::function<void (ceph::md_config_obs_impl<ConfigProxy>*, std::string const&)>::operator()(ceph::md_config_obs_impl<ConfigProxy>*, std::string const&) const+0x61) [0x5561b4a6f05f]",
|
||||
"(void ObserverMgr<ceph::md_config_obs_impl<ConfigProxy> >::for_each_change<ConfigProxy>(std::set<std::string, std::less<std::string>, std::allocator<std::string> > const&, ConfigProxy&, std::function<void (ceph::md_config_obs_impl<ConfigProxy>*, std::string const&)>, std::ostream*)+0x1cb) [0x5561b4a6e343]",
|
||||
"(ConfigProxy::_gather_changes(std::set<std::string, std::less<std::string>, std::allocator<std::string> >&, std::map<ceph::md_config_obs_impl<ConfigProxy>*, std::set<std::string, std::less<std::string>, std::allocator<std::string> >, std::less<ceph::md_config_obs_impl<ConfigProxy>*>, std::allocator<std::pair<ceph::md_config_obs_impl<ConfigProxy>* const, std::set<std::string, std::less<std::string>, std::allocator<std::string> > > > >*, std::ostream*)+0x76) [0x5561b4a6d6ca]",
|
||||
"(ConfigProxy::apply_changes(std::ostream*)+0x7c) [0x5561b4a6d5aa]",
|
||||
"(global_init(std::map<std::string, std::string, std::less<std::string>, std::allocator<std::pair<std::string const, std::string> > > const*, std::vector<char const*, std::allocator<char const*> >&, unsigned int, code_environment_t, int, char const*, bool)+0x1022) [0x5561b4a6a806]",
|
||||
"(()+0x9380c) [0x5561b4a2280c]",
|
||||
"(()+0x9618a) [0x5561b4a2518a]",
|
||||
"(main()+0x20) [0x5561b4a252e4]",
|
||||
"(__libc_start_main()+0xf5) [0x7f18a9242555]",
|
||||
"(()+0x907f9) [0x5561b4a1f7f9]"
|
||||
],
|
||||
"utsname_hostname": "test-ceph-server.company.example",
|
||||
"assert_msg": "/ceph/src/common/config_proxy.h: In function 'void ConfigProxy::call_gate_enter(ConfigProxy::md_config_obs_t*)' thread 7f18b63dfa00 time 2022-01-25 21:03:38.368357\n/ceph/src/common/config_proxy.h: 65: FAILED ceph_assert(p != obs_call_gate.end())\n",
|
||||
"crash_id": "2022-01-25_21:03:38.371403Z_f9df5b64-32ef-4073-8b37-d1c5a1b3dcb8",
|
||||
"assert_line": 65,
|
||||
"ceph_version": "14.2.18",
|
||||
"assert_func": "void ConfigProxy::call_gate_enter(ConfigProxy::md_config_obs_t*)"
|
||||
}
|
||||
]`,
|
||||
reMatch: []*regexp.Regexp{
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",daemon="osd.0",status="new"} 1`),
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",entity="client.admin",status="new"} 1`),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "single archived crash",
|
||||
input: `
|
||||
ID ENTITY NEW
|
||||
2022-02-01_21:02:46.687015Z_0de8b741-b323-4f63-828a-e460294e28b9 osd.0
|
||||
[
|
||||
{
|
||||
"entity_name": "client.admin",
|
||||
"timestamp": "2022-01-25 21:02:46.687015Z",
|
||||
"archived": "2022-06-14 19:44:40.356826",
|
||||
"crash_id": "2022-01-25_21:02:46.687015Z_d6513591-c16b-472f-8d40-5a143b28837d"
|
||||
}
|
||||
]
|
||||
`,
|
||||
reMatch: []*regexp.Regexp{
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",daemon="osd.0",status="archived"} 1`),
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",entity="client.admin",status="archived"} 1`),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "two new crashes same entity",
|
||||
input: `
|
||||
ID ENTITY NEW
|
||||
2022-02-01_21:02:46.687015Z_0de8b741-b323-4f63-828a-e460294e28b9 osd.0 *
|
||||
2022-02-03_04:05:45.419226Z_11c639af-5eb2-4a29-91aa-20120218891a osd.0 *
|
||||
`,
|
||||
[
|
||||
{
|
||||
"entity_name": "osd.0",
|
||||
"timestamp": "2022-02-01 21:02:46.687015Z",
|
||||
"crash_id": "2022-02-01_21:02:46.687015Z_0de8b741-b323-4f63-828a-e460294e28b9"
|
||||
},
|
||||
{
|
||||
"entity_name": "osd.0",
|
||||
"timestamp": "2022-02-03 04:05:45.419226Z",
|
||||
"crash_id": "2022-02-03_04:05:45.419226Z_11c639af-5eb2-4a29-91aa-20120218891a"
|
||||
}
|
||||
]`,
|
||||
reMatch: []*regexp.Regexp{
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",daemon="osd.0",status="new"} 2`),
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",entity="osd.0",status="new"} 2`),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "mix of crashes same entity",
|
||||
input: `
|
||||
ID ENTITY NEW
|
||||
2022-02-01_21:02:46.687015Z_0de8b741-b323-4f63-828a-e460294e28b9 osd.0
|
||||
2022-02-03_04:05:45.419226Z_11c639af-5eb2-4a29-91aa-20120218891a osd.0 *
|
||||
`,
|
||||
[
|
||||
{
|
||||
"entity_name": "osd.0",
|
||||
"timestamp": "2022-02-01 21:02:46.687015Z",
|
||||
"crash_id": "2022-02-01_21:02:46.687015Z_0de8b741-b323-4f63-828a-e460294e28b9"
|
||||
},
|
||||
{
|
||||
"entity_name": "osd.0",
|
||||
"timestamp": "2022-02-03 04:05:45.419226Z",
|
||||
"archived": "2022-06-14 19:44:40.356826",
|
||||
"crash_id": "2022-02-03_04:05:45.419226Z_11c639af-5eb2-4a29-91aa-20120218891a"
|
||||
}
|
||||
]`,
|
||||
reMatch: []*regexp.Regexp{
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",daemon="osd.0",status="new"} 1`),
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",daemon="osd.0",status="archived"} 1`),
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",entity="osd.0",status="new"} 1`),
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",entity="osd.0",status="archived"} 1`),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "mix of crashes different entities",
|
||||
input: `
|
||||
ID ENTITY NEW
|
||||
2022-02-01_21:02:46.687015Z_0de8b741-b323-4f63-828a-e460294e28b9 mgr.mgr-node-01 *
|
||||
2022-02-03_04:05:45.419226Z_11c639af-5eb2-4a29-91aa-20120218891a client.admin *
|
||||
`,
|
||||
[
|
||||
{
|
||||
"entity_name": "mgr.mgr-node-01",
|
||||
"timestamp": "2022-02-01 21:02:46.687015Z",
|
||||
"crash_id": "2022-02-01_21:02:46.687015Z_0de8b741-b323-4f63-828a-e460294e28b9"
|
||||
},
|
||||
{
|
||||
"entity_name": "client.admin",
|
||||
"timestamp": "2022-02-03 04:05:45.419226Z",
|
||||
"crash_id": "2022-02-03_04:05:45.419226Z_11c639af-5eb2-4a29-91aa-20120218891a"
|
||||
}
|
||||
]`,
|
||||
reMatch: []*regexp.Regexp{
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",daemon="mgr.mgr-node-01",status="new"} 1`),
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",daemon="client.admin",status="new"} 1`),
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",entity="mgr.mgr-node-01",status="new"} 1`),
|
||||
regexp.MustCompile(`crash_reports{cluster="ceph",entity="client.admin",status="new"} 1`),
|
||||
},
|
||||
},
|
||||
{
|
||||
// At least code shouldn't panic
|
||||
name: "no crashes",
|
||||
input: ``,
|
||||
input: `[]`,
|
||||
reMatch: []*regexp.Regexp{},
|
||||
},
|
||||
} {
|
||||
|
|
Loading…
Reference in New Issue