Merge pull request #16487 from liewegas/wip-mgr-module-health

mgr: mgr_module interface to report health alerts

Reviewed-by: John Spray <john.spray@redhat.com>
This commit is contained in:
John Spray 2017-07-28 13:24:41 +01:00 committed by GitHub
commit 2c4ad54f80
8 changed files with 166 additions and 2 deletions

View File

@ -1169,6 +1169,8 @@ void DaemonServer::send_report()
}
auto m = new MMonMgrReport();
py_modules.get_health_checks(&m->health_checks);
cluster_state.with_pgmap([&](const PGMap& pg_map) {
cluster_state.update_delta_stats();
@ -1191,6 +1193,7 @@ void DaemonServer::send_report()
pg_map.get_health_checks(g_ceph_context, osdmap,
&m->health_checks);
dout(10) << m->health_checks.checks.size() << " health checks"
<< dendl;
dout(20) << "health checks:\n";

View File

@ -365,3 +365,7 @@ int MgrPyModule::handle_command(
return r;
}
void MgrPyModule::get_health_checks(health_check_map_t *checks)
{
checks->merge(health_checks);
}

View File

@ -21,6 +21,8 @@
#include "common/cmdparse.h"
#include "common/LogEntry.h"
#include "common/Mutex.h"
#include "mon/health_check.h"
#include <vector>
#include <string>
@ -47,6 +49,8 @@ private:
PyThreadState *pMainThreadState;
PyThreadState *pMyThreadState = nullptr;
health_check_map_t health_checks;
std::vector<ModuleCommand> commands;
int load_commands();
@ -75,6 +79,11 @@ public:
const cmdmap_t &cmdmap,
std::stringstream *ds,
std::stringstream *ss);
void set_health_checks(health_check_map_t&& c) {
health_checks = std::move(c);
}
void get_health_checks(health_check_map_t *checks);
};
std::string handle_pyerror();

View File

@ -800,3 +800,21 @@ void PyModules::list_modules(std::set<std::string> *modules)
{
_list_modules(g_conf->mgr_module_path, modules);
}
void PyModules::set_health_checks(const std::string& handle,
health_check_map_t&& checks)
{
Mutex::Locker l(lock);
auto p = modules.find(handle);
if (p != modules.end()) {
p->second->set_health_checks(std::move(checks));
}
}
void PyModules::get_health_checks(health_check_map_t *checks)
{
Mutex::Locker l(lock);
for (auto& p : modules) {
p.second->get_health_checks(checks);
}
}

View File

@ -30,6 +30,7 @@
#include "ClusterState.h"
class ServeThread;
class health_check_map_t;
class PyModules
{
@ -115,6 +116,10 @@ public:
void set_config(const std::string &handle,
const std::string &key, const std::string &val);
void set_health_checks(const std::string& handle,
health_check_map_t&& checks);
void get_health_checks(health_check_map_t *checks);
void log(const std::string &handle,
int level, const std::string &record);

View File

@ -182,6 +182,107 @@ ceph_send_command(PyObject *self, PyObject *args)
Py_RETURN_NONE;
}
static PyObject*
ceph_set_health_checks(PyObject *self, PyObject *args)
{
char *handle = nullptr;
PyObject *checks = NULL;
if (!PyArg_ParseTuple(args, "sO:ceph_set_health_checks", &handle, &checks)) {
return NULL;
}
if (!PyDict_Check(checks)) {
derr << __func__ << " arg not a dict" << dendl;
Py_RETURN_NONE;
}
PyObject *checksls = PyDict_Items(checks);
health_check_map_t out_checks;
for (int i = 0; i < PyList_Size(checksls); ++i) {
PyObject *kv = PyList_GET_ITEM(checksls, i);
char *check_name = nullptr;
PyObject *check_info = nullptr;
if (!PyArg_ParseTuple(kv, "sO:pair", &check_name, &check_info)) {
derr << __func__ << " dict item " << i
<< " not a size 2 tuple" << dendl;
continue;
}
if (!PyDict_Check(check_info)) {
derr << __func__ << " item " << i << " " << check_name
<< " value not a dict" << dendl;
continue;
}
health_status_t severity = HEALTH_OK;
string summary;
list<string> detail;
PyObject *infols = PyDict_Items(check_info);
for (int j = 0; j < PyList_Size(infols); ++j) {
PyObject *pair = PyList_GET_ITEM(infols, j);
if (!PyTuple_Check(pair)) {
derr << __func__ << " item " << i << " pair " << j
<< " not a tuple" << dendl;
continue;
}
char *k = nullptr;
PyObject *v = nullptr;
if (!PyArg_ParseTuple(pair, "sO:pair", &k, &v)) {
derr << __func__ << " item " << i << " pair " << j
<< " not a size 2 tuple" << dendl;
continue;
}
string ks(k);
if (ks == "severity") {
if (!PyString_Check(v)) {
derr << __func__ << " check " << check_name
<< " severity value not string" << dendl;
continue;
}
string vs(PyString_AsString(v));
if (vs == "warning") {
severity = HEALTH_WARN;
} else if (vs == "error") {
severity = HEALTH_ERR;
}
} else if (ks == "summary") {
if (!PyString_Check(v)) {
derr << __func__ << " check " << check_name
<< " summary value not string" << dendl;
continue;
}
summary = PyString_AsString(v);
} else if (ks == "detail") {
if (!PyList_Check(v)) {
derr << __func__ << " check " << check_name
<< " detail value not list" << dendl;
continue;
}
for (int k = 0; k < PyList_Size(v); ++k) {
PyObject *di = PyList_GET_ITEM(v, k);
if (!PyString_Check(di)) {
derr << __func__ << " check " << check_name
<< " detail item " << k << " not a string" << dendl;
continue;
}
detail.push_back(PyString_AsString(di));
}
} else {
derr << __func__ << " check " << check_name
<< " unexpected key " << k << dendl;
}
}
auto& d = out_checks.add(check_name, severity, summary);
d.detail.swap(detail);
}
JSONFormatter jf(true);
dout(10) << "module " << handle << " health checks:\n";
out_checks.dump(&jf);
jf.flush(*_dout);
*_dout << dendl;
global_handle->set_health_checks(handle, std::move(out_checks));
Py_RETURN_NONE;
}
static PyObject*
ceph_state_get(PyObject *self, PyObject *args)
@ -359,6 +460,8 @@ PyMethodDef CephStateMethods[] = {
"Get a service's status"},
{"send_command", ceph_send_command, METH_VARARGS,
"Send a mon command"},
{"set_health_checks", ceph_set_health_checks, METH_VARARGS,
"Set health checks for this module"},
{"get_mgr_id", ceph_get_mgr_id, METH_NOARGS,
"Get the mgr id"},
{"get_config", ceph_config_get, METH_VARARGS,

View File

@ -2562,8 +2562,6 @@ void PGMap::get_health_checks(
const unsigned max = cct->_conf->mon_health_max_detail;
const auto& pools = osdmap.get_pools();
checks->clear();
typedef enum pg_consequence_t {
UNAVAILABLE = 1, // Client IO to the pool may block
DEGRADED = 2, // Fewer than the requested number of replicas are present

View File

@ -169,6 +169,30 @@ class MgrModule(object):
"""
ceph_state.send_command(self._handle, *args, **kwargs)
def set_health_checks(self, checks):
"""
Set module's health checks
Set the module's current map of health checks. Argument is a
dict of check names to info, in this form:
{
'CHECK_FOO': {
'severity': 'warning', # or 'error'
'summary': 'summary string',
'detail': [ 'list', 'of', 'detail', 'strings' ],
},
'CHECK_BAR': {
'severity': 'error',
'summary': 'bars are bad',
'detail': [ 'too hard' ],
},
}
:param list: dict of health check dicts
"""
ceph_state.set_health_checks(self._handle, checks)
def handle_command(self, cmd):
"""
Called by ceph-mgr to request the plugin to handle one