mirror of
https://github.com/ceph/ceph
synced 2024-12-18 01:16:55 +00:00
Merge pull request #16487 from liewegas/wip-mgr-module-health
mgr: mgr_module interface to report health alerts Reviewed-by: John Spray <john.spray@redhat.com>
This commit is contained in:
commit
2c4ad54f80
@ -1169,6 +1169,8 @@ void DaemonServer::send_report()
|
||||
}
|
||||
|
||||
auto m = new MMonMgrReport();
|
||||
py_modules.get_health_checks(&m->health_checks);
|
||||
|
||||
cluster_state.with_pgmap([&](const PGMap& pg_map) {
|
||||
cluster_state.update_delta_stats();
|
||||
|
||||
@ -1191,6 +1193,7 @@ void DaemonServer::send_report()
|
||||
|
||||
pg_map.get_health_checks(g_ceph_context, osdmap,
|
||||
&m->health_checks);
|
||||
|
||||
dout(10) << m->health_checks.checks.size() << " health checks"
|
||||
<< dendl;
|
||||
dout(20) << "health checks:\n";
|
||||
|
@ -365,3 +365,7 @@ int MgrPyModule::handle_command(
|
||||
return r;
|
||||
}
|
||||
|
||||
void MgrPyModule::get_health_checks(health_check_map_t *checks)
|
||||
{
|
||||
checks->merge(health_checks);
|
||||
}
|
||||
|
@ -21,6 +21,8 @@
|
||||
|
||||
#include "common/cmdparse.h"
|
||||
#include "common/LogEntry.h"
|
||||
#include "common/Mutex.h"
|
||||
#include "mon/health_check.h"
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
@ -47,6 +49,8 @@ private:
|
||||
PyThreadState *pMainThreadState;
|
||||
PyThreadState *pMyThreadState = nullptr;
|
||||
|
||||
health_check_map_t health_checks;
|
||||
|
||||
std::vector<ModuleCommand> commands;
|
||||
|
||||
int load_commands();
|
||||
@ -75,6 +79,11 @@ public:
|
||||
const cmdmap_t &cmdmap,
|
||||
std::stringstream *ds,
|
||||
std::stringstream *ss);
|
||||
|
||||
void set_health_checks(health_check_map_t&& c) {
|
||||
health_checks = std::move(c);
|
||||
}
|
||||
void get_health_checks(health_check_map_t *checks);
|
||||
};
|
||||
|
||||
std::string handle_pyerror();
|
||||
|
@ -800,3 +800,21 @@ void PyModules::list_modules(std::set<std::string> *modules)
|
||||
{
|
||||
_list_modules(g_conf->mgr_module_path, modules);
|
||||
}
|
||||
|
||||
void PyModules::set_health_checks(const std::string& handle,
|
||||
health_check_map_t&& checks)
|
||||
{
|
||||
Mutex::Locker l(lock);
|
||||
auto p = modules.find(handle);
|
||||
if (p != modules.end()) {
|
||||
p->second->set_health_checks(std::move(checks));
|
||||
}
|
||||
}
|
||||
|
||||
void PyModules::get_health_checks(health_check_map_t *checks)
|
||||
{
|
||||
Mutex::Locker l(lock);
|
||||
for (auto& p : modules) {
|
||||
p.second->get_health_checks(checks);
|
||||
}
|
||||
}
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "ClusterState.h"
|
||||
|
||||
class ServeThread;
|
||||
class health_check_map_t;
|
||||
|
||||
class PyModules
|
||||
{
|
||||
@ -115,6 +116,10 @@ public:
|
||||
void set_config(const std::string &handle,
|
||||
const std::string &key, const std::string &val);
|
||||
|
||||
void set_health_checks(const std::string& handle,
|
||||
health_check_map_t&& checks);
|
||||
void get_health_checks(health_check_map_t *checks);
|
||||
|
||||
void log(const std::string &handle,
|
||||
int level, const std::string &record);
|
||||
|
||||
|
@ -182,6 +182,107 @@ ceph_send_command(PyObject *self, PyObject *args)
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
ceph_set_health_checks(PyObject *self, PyObject *args)
|
||||
{
|
||||
char *handle = nullptr;
|
||||
PyObject *checks = NULL;
|
||||
if (!PyArg_ParseTuple(args, "sO:ceph_set_health_checks", &handle, &checks)) {
|
||||
return NULL;
|
||||
}
|
||||
if (!PyDict_Check(checks)) {
|
||||
derr << __func__ << " arg not a dict" << dendl;
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
PyObject *checksls = PyDict_Items(checks);
|
||||
health_check_map_t out_checks;
|
||||
for (int i = 0; i < PyList_Size(checksls); ++i) {
|
||||
PyObject *kv = PyList_GET_ITEM(checksls, i);
|
||||
char *check_name = nullptr;
|
||||
PyObject *check_info = nullptr;
|
||||
if (!PyArg_ParseTuple(kv, "sO:pair", &check_name, &check_info)) {
|
||||
derr << __func__ << " dict item " << i
|
||||
<< " not a size 2 tuple" << dendl;
|
||||
continue;
|
||||
}
|
||||
if (!PyDict_Check(check_info)) {
|
||||
derr << __func__ << " item " << i << " " << check_name
|
||||
<< " value not a dict" << dendl;
|
||||
continue;
|
||||
}
|
||||
health_status_t severity = HEALTH_OK;
|
||||
string summary;
|
||||
list<string> detail;
|
||||
PyObject *infols = PyDict_Items(check_info);
|
||||
for (int j = 0; j < PyList_Size(infols); ++j) {
|
||||
PyObject *pair = PyList_GET_ITEM(infols, j);
|
||||
if (!PyTuple_Check(pair)) {
|
||||
derr << __func__ << " item " << i << " pair " << j
|
||||
<< " not a tuple" << dendl;
|
||||
continue;
|
||||
}
|
||||
char *k = nullptr;
|
||||
PyObject *v = nullptr;
|
||||
if (!PyArg_ParseTuple(pair, "sO:pair", &k, &v)) {
|
||||
derr << __func__ << " item " << i << " pair " << j
|
||||
<< " not a size 2 tuple" << dendl;
|
||||
continue;
|
||||
}
|
||||
string ks(k);
|
||||
if (ks == "severity") {
|
||||
if (!PyString_Check(v)) {
|
||||
derr << __func__ << " check " << check_name
|
||||
<< " severity value not string" << dendl;
|
||||
continue;
|
||||
}
|
||||
string vs(PyString_AsString(v));
|
||||
if (vs == "warning") {
|
||||
severity = HEALTH_WARN;
|
||||
} else if (vs == "error") {
|
||||
severity = HEALTH_ERR;
|
||||
}
|
||||
} else if (ks == "summary") {
|
||||
if (!PyString_Check(v)) {
|
||||
derr << __func__ << " check " << check_name
|
||||
<< " summary value not string" << dendl;
|
||||
continue;
|
||||
}
|
||||
summary = PyString_AsString(v);
|
||||
} else if (ks == "detail") {
|
||||
if (!PyList_Check(v)) {
|
||||
derr << __func__ << " check " << check_name
|
||||
<< " detail value not list" << dendl;
|
||||
continue;
|
||||
}
|
||||
for (int k = 0; k < PyList_Size(v); ++k) {
|
||||
PyObject *di = PyList_GET_ITEM(v, k);
|
||||
if (!PyString_Check(di)) {
|
||||
derr << __func__ << " check " << check_name
|
||||
<< " detail item " << k << " not a string" << dendl;
|
||||
continue;
|
||||
}
|
||||
detail.push_back(PyString_AsString(di));
|
||||
}
|
||||
} else {
|
||||
derr << __func__ << " check " << check_name
|
||||
<< " unexpected key " << k << dendl;
|
||||
}
|
||||
}
|
||||
auto& d = out_checks.add(check_name, severity, summary);
|
||||
d.detail.swap(detail);
|
||||
}
|
||||
|
||||
JSONFormatter jf(true);
|
||||
dout(10) << "module " << handle << " health checks:\n";
|
||||
out_checks.dump(&jf);
|
||||
jf.flush(*_dout);
|
||||
*_dout << dendl;
|
||||
|
||||
global_handle->set_health_checks(handle, std::move(out_checks));
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
||||
static PyObject*
|
||||
ceph_state_get(PyObject *self, PyObject *args)
|
||||
@ -359,6 +460,8 @@ PyMethodDef CephStateMethods[] = {
|
||||
"Get a service's status"},
|
||||
{"send_command", ceph_send_command, METH_VARARGS,
|
||||
"Send a mon command"},
|
||||
{"set_health_checks", ceph_set_health_checks, METH_VARARGS,
|
||||
"Set health checks for this module"},
|
||||
{"get_mgr_id", ceph_get_mgr_id, METH_NOARGS,
|
||||
"Get the mgr id"},
|
||||
{"get_config", ceph_config_get, METH_VARARGS,
|
||||
|
@ -2562,8 +2562,6 @@ void PGMap::get_health_checks(
|
||||
const unsigned max = cct->_conf->mon_health_max_detail;
|
||||
const auto& pools = osdmap.get_pools();
|
||||
|
||||
checks->clear();
|
||||
|
||||
typedef enum pg_consequence_t {
|
||||
UNAVAILABLE = 1, // Client IO to the pool may block
|
||||
DEGRADED = 2, // Fewer than the requested number of replicas are present
|
||||
|
@ -169,6 +169,30 @@ class MgrModule(object):
|
||||
"""
|
||||
ceph_state.send_command(self._handle, *args, **kwargs)
|
||||
|
||||
def set_health_checks(self, checks):
|
||||
"""
|
||||
Set module's health checks
|
||||
|
||||
Set the module's current map of health checks. Argument is a
|
||||
dict of check names to info, in this form:
|
||||
|
||||
{
|
||||
'CHECK_FOO': {
|
||||
'severity': 'warning', # or 'error'
|
||||
'summary': 'summary string',
|
||||
'detail': [ 'list', 'of', 'detail', 'strings' ],
|
||||
},
|
||||
'CHECK_BAR': {
|
||||
'severity': 'error',
|
||||
'summary': 'bars are bad',
|
||||
'detail': [ 'too hard' ],
|
||||
},
|
||||
}
|
||||
|
||||
:param list: dict of health check dicts
|
||||
"""
|
||||
ceph_state.set_health_checks(self._handle, checks)
|
||||
|
||||
def handle_command(self, cmd):
|
||||
"""
|
||||
Called by ceph-mgr to request the plugin to handle one
|
||||
|
Loading…
Reference in New Issue
Block a user