mirror of
https://github.com/ceph/ceph
synced 2025-01-01 08:32:24 +00:00
mgr: monitor-controlled always on modules
the original version of the always-on mgr modules feature had an issue such that if a user-enabled module was included in the always-on list of modules then after an upgrade the module would remain in the list of enabled modules (because it was still in the manager map). this results in awkward ux since the modules cannot actually be disabled. this patch addresses the issue by altering MgrMap to contain the always on modules, and centralizes control of this list in the monitor. the list of always on modules is versioned at compile time, and the monitor will update the manager map when this verison is bumped. manager daemons enable the union of user-enabled modules and the list of always on modules. this is important so that a user-enabled module is not disabled if it goes through a cycle of being in the always on list, and then being removed through an upgrade sequence. communicating the always on modules to the daemon enables the daemon to provide the best error reporting when an always on module encounters an issue (i.e. complain loudly). Signed-off-by: Noah Watkins <nwatkins@redhat.com>
This commit is contained in:
parent
8ef9b7e2a5
commit
18f253aa3f
@ -188,10 +188,6 @@ void MgrStandby::send_beacon()
|
||||
// which we will transmit to the monitor.
|
||||
std::vector<MgrMap::ModuleInfo> module_info;
|
||||
for (const auto &module : modules) {
|
||||
// do not announce always_on modules to the monitor
|
||||
if (module->is_always_on()) {
|
||||
continue;
|
||||
}
|
||||
MgrMap::ModuleInfo info;
|
||||
info.name = module->get_name();
|
||||
info.error_string = module->get_error_string();
|
||||
|
@ -57,13 +57,15 @@ class PyModule
|
||||
mutable Mutex lock{"PyModule::lock"};
|
||||
private:
|
||||
const std::string module_name;
|
||||
const bool always_on;
|
||||
std::string get_site_packages();
|
||||
int load_subclass_of(const char* class_name, PyObject** py_class);
|
||||
|
||||
// Did the MgrMap identify this module as one that should run?
|
||||
bool enabled = false;
|
||||
|
||||
// Did the MgrMap flag this module as always on?
|
||||
bool always_on = false;
|
||||
|
||||
// Did we successfully import this python module and look up symbols?
|
||||
// (i.e. is it possible to instantiate a MgrModule subclass instance?)
|
||||
bool loaded = false;
|
||||
@ -97,8 +99,8 @@ public:
|
||||
PyObject *pClass = nullptr;
|
||||
PyObject *pStandbyClass = nullptr;
|
||||
|
||||
explicit PyModule(const std::string &module_name_, bool always_on)
|
||||
: module_name(module_name_), always_on(always_on)
|
||||
explicit PyModule(const std::string &module_name_)
|
||||
: module_name(module_name_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -120,6 +122,10 @@ public:
|
||||
enabled = enabled_;
|
||||
}
|
||||
|
||||
void set_always_on(const bool always_on_) {
|
||||
always_on = always_on_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extend `out` with the contents of `this->commands`
|
||||
*/
|
||||
|
@ -71,12 +71,9 @@ void PyModuleRegistry::init()
|
||||
for (const auto& module_name : module_names) {
|
||||
dout(1) << "Loading python module '" << module_name << "'" << dendl;
|
||||
|
||||
const bool always_on = always_on_modules.find(module_name) !=
|
||||
always_on_modules.end();
|
||||
|
||||
// Everything starts disabled, set enabled flag on module
|
||||
// when we see first MgrMap
|
||||
auto mod = std::make_shared<PyModule>(module_name, always_on);
|
||||
auto mod = std::make_shared<PyModule>(module_name);
|
||||
int r = mod->load(pMainThreadState);
|
||||
if (r != 0) {
|
||||
// Don't use handle_pyerror() here; we don't have the GIL
|
||||
@ -111,11 +108,14 @@ bool PyModuleRegistry::handle_mgr_map(const MgrMap &mgr_map_)
|
||||
for (const auto &[module_name, module] : modules) {
|
||||
const bool enabled = (mgr_map.modules.count(module_name) > 0);
|
||||
module->set_enabled(enabled);
|
||||
const bool always_on = (mgr_map.get_always_on_modules().count(module_name) > 0);
|
||||
module->set_always_on(always_on);
|
||||
}
|
||||
|
||||
return false;
|
||||
} else {
|
||||
bool modules_changed = mgr_map_.modules != mgr_map.modules;
|
||||
bool modules_changed = mgr_map_.modules != mgr_map.modules ||
|
||||
mgr_map_.always_on_modules != mgr_map.always_on_modules;
|
||||
mgr_map = mgr_map_;
|
||||
|
||||
if (standby_modules != nullptr) {
|
||||
@ -374,7 +374,7 @@ void PyModuleRegistry::get_health_checks(health_check_map_t *checks)
|
||||
}
|
||||
|
||||
// report failed always_on modules as health errors
|
||||
for (const auto& name : always_on_modules) {
|
||||
for (const auto& name : mgr_map.get_always_on_modules()) {
|
||||
if (!active_modules->module_exists(name)) {
|
||||
if (failed_modules.find(name) == failed_modules.end() &&
|
||||
dependency_modules.find(name) == dependency_modules.end()) {
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <memory>
|
||||
|
||||
#include "common/LogClient.h"
|
||||
|
@ -3,24 +3,6 @@
|
||||
|
||||
#include "mgr_commands.h"
|
||||
|
||||
// an always_on module behaves as if it provides built-in functionality. it's
|
||||
// always enabled, and always_on modules do not appear in the list of modules
|
||||
// that can be enabled or disabled by the user. a health warning error is
|
||||
// reported if a module in this set is cannot be loaded and started.
|
||||
//
|
||||
// NOTE: if a module is added to this set AND that module _may_ have been
|
||||
// enabled in some cluster (e.g. an existing module is being upgraded to
|
||||
// always-on status), then additional logic is needed in the monitor to trim the
|
||||
// enabled set (or upgrade script to run `disable <module`). At the time of
|
||||
// writing, the proposed always-on modules are being added within the same
|
||||
// release.
|
||||
const std::set<std::string> always_on_modules = {
|
||||
"crash",
|
||||
"status",
|
||||
"balancer",
|
||||
"devicehealth",
|
||||
};
|
||||
|
||||
/* The set of statically defined (C++-handled) commands. This
|
||||
* does not include the Python-defined commands, which are loaded
|
||||
* in PyModules */
|
||||
|
@ -4,9 +4,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "mon/MonCommand.h"
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
extern const std::set<std::string> always_on_modules;
|
||||
extern const std::vector<MonCommand> mgr_commands;
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "msg/msg_types.h"
|
||||
#include "common/Formatter.h"
|
||||
#include "include/encoding.h"
|
||||
#include "common/version.h"
|
||||
|
||||
|
||||
class MgrMap
|
||||
@ -145,6 +146,11 @@ public:
|
||||
// Modules which are enabled
|
||||
std::set<std::string> modules;
|
||||
|
||||
// Modules which should always be enabled. A manager daemon will enable
|
||||
// modules from the union of this set and the `modules` set above, latest
|
||||
// active version.
|
||||
std::map<uint32_t, std::set<std::string>> always_on_modules;
|
||||
|
||||
// Modules which are reported to exist
|
||||
std::vector<ModuleInfo> available_modules;
|
||||
|
||||
@ -236,6 +242,13 @@ public:
|
||||
return ls;
|
||||
}
|
||||
|
||||
std::set<std::string> get_always_on_modules() const {
|
||||
auto it = always_on_modules.find(ceph_release());
|
||||
if (it == always_on_modules.end())
|
||||
return {};
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void encode(bufferlist& bl, uint64_t features) const
|
||||
{
|
||||
if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
|
||||
@ -261,7 +274,7 @@ public:
|
||||
ENCODE_FINISH(bl);
|
||||
return;
|
||||
}
|
||||
ENCODE_START(7, 6, bl);
|
||||
ENCODE_START(8, 6, bl);
|
||||
encode(epoch, bl);
|
||||
encode(active_addrs, bl, features);
|
||||
encode(active_gid, bl);
|
||||
@ -272,6 +285,7 @@ public:
|
||||
encode(services, bl);
|
||||
encode(available_modules, bl);
|
||||
encode(active_change, bl);
|
||||
encode(always_on_modules, bl);
|
||||
ENCODE_FINISH(bl);
|
||||
return;
|
||||
}
|
||||
@ -314,6 +328,9 @@ public:
|
||||
} else {
|
||||
active_change = {};
|
||||
}
|
||||
if (struct_v >= 8) {
|
||||
decode(always_on_modules, p);
|
||||
}
|
||||
DECODE_FINISH(p);
|
||||
}
|
||||
|
||||
@ -353,6 +370,16 @@ public:
|
||||
f->dump_string(i.first.c_str(), i.second);
|
||||
}
|
||||
f->close_section();
|
||||
|
||||
f->open_object_section("always_on_modules");
|
||||
for (auto& v : always_on_modules) {
|
||||
f->open_array_section(ceph_release_name(v.first));
|
||||
for (auto& m : v.second) {
|
||||
f->dump_string("module", m);
|
||||
}
|
||||
f->close_section();
|
||||
}
|
||||
f->close_section();
|
||||
}
|
||||
|
||||
static void generate_test_instances(list<MgrMap*> &l) {
|
||||
|
@ -36,6 +36,19 @@ static ostream& _prefix(std::ostream *_dout, Monitor *mon,
|
||||
<< ").mgr e" << mgrmap.get_epoch() << " ";
|
||||
}
|
||||
|
||||
// the system treats always_on_modules as if they provide built-in functionality
|
||||
// by ensuring that they are always enabled.
|
||||
const static std::map<uint32_t, std::set<std::string>> always_on_modules = {
|
||||
{
|
||||
CEPH_RELEASE_NAUTILUS, {
|
||||
"crash",
|
||||
"status",
|
||||
"balancer",
|
||||
"devicehealth"
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Prefix for mon store of active mgr's command descriptions
|
||||
const static std::string command_descs_prefix = "mgr_command_descs";
|
||||
|
||||
@ -57,9 +70,11 @@ void MgrMonitor::create_initial()
|
||||
for (auto& m : tok) {
|
||||
pending_map.modules.insert(m);
|
||||
}
|
||||
pending_map.always_on_modules = always_on_modules;
|
||||
pending_command_descs = mgr_commands;
|
||||
dout(10) << __func__ << " initial modules " << pending_map.modules
|
||||
<< ", " << pending_command_descs.size() << " commands"
|
||||
<< ", always on modules " << pending_map.get_always_on_modules()
|
||||
<< ", " << pending_command_descs.size() << " commands"
|
||||
<< dendl;
|
||||
}
|
||||
|
||||
@ -546,6 +561,13 @@ void MgrMonitor::on_active()
|
||||
{
|
||||
if (mon->is_leader()) {
|
||||
mon->clog->debug() << "mgrmap e" << map.epoch << ": " << map;
|
||||
|
||||
if (pending_map.always_on_modules != always_on_modules) {
|
||||
pending_map.always_on_modules = always_on_modules;
|
||||
dout(4) << "always on modules changed "
|
||||
<< pending_map.get_always_on_modules() << dendl;
|
||||
propose_pending();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -761,6 +783,8 @@ bool MgrMonitor::preprocess_command(MonOpRequestRef op)
|
||||
{
|
||||
f->open_array_section("enabled_modules");
|
||||
for (auto& p : map.modules) {
|
||||
if (map.get_always_on_modules().count(p) > 0)
|
||||
continue;
|
||||
// We only show the name for enabled modules. The any errors
|
||||
// etc will show up as a health checks.
|
||||
f->dump_string("module", p);
|
||||
@ -768,7 +792,8 @@ bool MgrMonitor::preprocess_command(MonOpRequestRef op)
|
||||
f->close_section();
|
||||
f->open_array_section("disabled_modules");
|
||||
for (auto& p : map.available_modules) {
|
||||
if (map.modules.count(p.name) == 0) {
|
||||
if (map.modules.count(p.name) == 0 &&
|
||||
map.get_always_on_modules().count(p.name) == 0) {
|
||||
// For disabled modules, we show the full info, to
|
||||
// give a hint about whether enabling it will work
|
||||
p.dump(f.get());
|
||||
@ -926,6 +951,10 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op)
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (pending_map.get_always_on_modules().count(module) > 0) {
|
||||
ss << "module '" << module << "' is already enabled (always-on)";
|
||||
goto out;
|
||||
}
|
||||
string force;
|
||||
cmd_getval(g_ceph_context, cmdmap, "force", force);
|
||||
if (!pending_map.all_support_module(module) &&
|
||||
@ -958,6 +987,11 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op)
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (pending_map.get_always_on_modules().count(module) > 0) {
|
||||
ss << "module '" << module << "' cannot be disabled (always-on)";
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (!pending_map.module_enabled(module)) {
|
||||
ss << "module '" << module << "' is already disabled";
|
||||
r = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user