Merge pull request #31949 from dillaman/wip-42748

rbd: incorporate rbd-mirror daemon status in mirror pool status

Reviewed-by: Mykola Golub <mgolub@suse.com>
This commit is contained in:
Mykola Golub 2019-12-06 10:43:47 +02:00 committed by GitHub
commit d8600f9383
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 424 additions and 173 deletions

View File

@ -77,7 +77,7 @@ wait_for_pool_healthy()
for s in `seq 1 40`; do
test $s -ne 1 && sleep 30
state=$(rbd --cluster ${cluster} -p ${pool} mirror pool status | grep 'health:' | cut -d' ' -f 2)
state=$(rbd --cluster ${cluster} -p ${pool} mirror pool status | grep 'image health:' | cut -d' ' -f 3)
test "${state}" = "ERROR" && break
test "${state}" = "OK" && return 0
done

View File

@ -14,162 +14,293 @@
namespace rbd {
std::ostream& operator<<(std::ostream& os, MirrorHealth mirror_health) {
switch (mirror_health) {
case MIRROR_HEALTH_OK:
os << "OK";
break;
case MIRROR_HEALTH_UNKNOWN:
os << "UNKNOWN";
break;
case MIRROR_HEALTH_WARNING:
os << "WARNING";
break;
case MIRROR_HEALTH_ERROR:
os << "ERROR";
break;
}
return os;
}
std::string MirrorService::get_image_description() const {
std::string description = (!client_id.empty() ? client_id :
stringify(service_id));
if (!hostname.empty()) {
description += " on " + hostname;
}
return description;
}
void MirrorService::dump_image(
argument_types::Format::Formatter formatter) const {
formatter->open_object_section("daemon_service");
formatter->dump_string("service_id", service_id);
formatter->dump_string("instance_id", instance_id);
formatter->dump_string("daemon_id", client_id);
formatter->dump_string("hostname", hostname);
formatter->close_section();
}
int MirrorDaemonServiceInfo::init() {
int r = get_mirror_service_dump();
if (r < 0) {
return r;
} else if (m_mirror_services.empty()) {
return 0;
}
std::string cmd = "{\"prefix\": \"service dump\"}";
r = get_mirror_service_status();
if (r < 0) {
return r;
}
return 0;
}
const MirrorService* MirrorDaemonServiceInfo::get_by_service_id(
const std::string& service_id) const {
auto it = m_mirror_services.find(service_id);
if (it == m_mirror_services.end()) {
return nullptr;
}
return &it->second;
}
const MirrorService* MirrorDaemonServiceInfo::get_by_instance_id(
const std::string& instance_id) const {
auto it = m_instance_to_service_ids.find(instance_id);
if (it == m_instance_to_service_ids.end()) {
return nullptr;
}
return get_by_service_id(it->second);
}
MirrorServices MirrorDaemonServiceInfo::get_mirror_services() const {
MirrorServices mirror_services;
for (auto& it : m_mirror_services) {
mirror_services.push_back(it.second);
}
return mirror_services;
}
int MirrorDaemonServiceInfo::get_mirror_service_dump() {
librados::Rados rados(m_io_ctx);
std::string cmd = R"({"prefix": "service dump", "format": "json"})";
bufferlist in_bl;
bufferlist out_bl;
int r = librados::Rados(m_io_ctx).mgr_command(cmd, in_bl, &out_bl, nullptr);
int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr);
if (r < 0) {
std::cerr << "rbd: failed to get service dump: " << cpp_strerror(r)
std::cerr << "rbd: failed to query services: " << cpp_strerror(r)
<< std::endl;
return r;
}
bool json_valid = false;
json_spirit::mValue json_root;
if (json_spirit::read(out_bl.to_str(), json_root)) {
try {
auto& json_obj = json_root.get_obj();
if (json_obj.count("services")) {
auto &services = json_obj["services"].get_obj();
if (services.count("rbd-mirror")) {
auto &mirror_service = services["rbd-mirror"].get_obj();
if (mirror_service.count("daemons")) {
for (auto &it : mirror_service["daemons"].get_obj()) {
if (it.second.type() != json_spirit::obj_type ||
!it.second.get_obj().count("metadata")) {
continue;
}
auto &service_id = it.first;
auto &daemon_metadata = it.second.get_obj()["metadata"].get_obj();
for (auto &iter : daemon_metadata) {
if (iter.second.type() != json_spirit::str_type) {
continue;
}
m_daemons_metadata[service_id][iter.first] = iter.second.get_str();
}
}
}
}
}
json_valid = true;
} catch (std::runtime_error&) {
}
}
if (!json_valid) {
std::cerr << "rbd: failed to parse service status" << std::endl;
if(!json_spirit::read(out_bl.to_str(), json_root)) {
std::cerr << "rbd: invalid service dump JSON received" << std::endl;
return -EBADMSG;
}
cmd = "{\"prefix\": \"service status\"}";
out_bl.clear();
r = librados::Rados(m_io_ctx).mgr_command(cmd, in_bl, &out_bl, nullptr);
if (r < 0) {
std::cerr << "rbd: failed to get service status: " << cpp_strerror(r)
<< std::endl;
return r;
}
json_valid = false;
if (json_spirit::read(out_bl.to_str(), json_root)) {
try {
auto& json_obj = json_root.get_obj();
if (json_obj.count("rbd-mirror")) {
auto &mirror_service = json_obj["rbd-mirror"].get_obj();
for (auto &it : mirror_service) {
auto &service_id = it.first;
auto &daemon = it.second.get_obj();
if (daemon.count("status") &&
daemon["status"].get_obj().count("json")) {
auto& status_json_str =
daemon["status"].get_obj()["json"].get_str();
json_spirit::mValue status_json_root;
if (json_spirit::read(status_json_str, status_json_root)) {
auto& status = status_json_root.get_obj();
auto iter = status.find(stringify(m_io_ctx.get_id()));
if (iter != status.end() &&
iter->second.get_obj().count("instance_id")) {
auto &instance_id =
iter->second.get_obj()["instance_id"].get_str();
m_instance_id_to_service_id[instance_id] = service_id;
}
}
}
}
}
json_valid = true;
} catch (std::runtime_error&) {
try {
auto& services = json_root.get_obj()["services"];
if (services.is_null()) {
std::cerr << "rbd: missing services in service dump JSON" << std::endl;
return -EBADMSG;
}
}
if (!json_valid) {
std::cerr << "rbd: failed to parse service status" << std::endl;
auto& service = services.get_obj()["rbd-mirror"];
if (service.is_null()) {
// no rbd-mirror daemons running
return 0;
}
auto& daemons = service.get_obj()["daemons"];
if (daemons.is_null()) {
return 0;
}
for (auto& daemon_pair : daemons.get_obj()) {
// rbd-mirror instances will always be integers but other objects
// are included
auto& service_id = daemon_pair.first;
if (daemon_pair.second.type() != json_spirit::obj_type) {
continue;
}
auto& daemon = daemon_pair.second.get_obj();
auto& metadata_val = daemon["metadata"];
if (metadata_val.is_null()) {
continue;
}
auto& metadata = metadata_val.get_obj();
MirrorService mirror_service{service_id};
auto& client_id = metadata["id"];
if (!client_id.is_null()) {
mirror_service.client_id = client_id.get_str();
}
auto& ceph_version = metadata["ceph_version_short"];
if (!ceph_version.is_null()) {
mirror_service.ceph_version = ceph_version.get_str();
}
auto& hostname = metadata["hostname"];
if (!hostname.is_null()) {
mirror_service.hostname = hostname.get_str();
}
m_mirror_services[service_id] = mirror_service;
}
} catch (std::runtime_error&) {
std::cerr << "rbd: unexpected service dump JSON received" << std::endl;
return -EBADMSG;
}
return 0;
}
std::string MirrorDaemonServiceInfo::get_description(
const std::string &instance_id) const {
if (!m_instance_id_to_service_id.count(instance_id)) {
return {};
int MirrorDaemonServiceInfo::get_mirror_service_status() {
librados::Rados rados(m_io_ctx);
std::string cmd = R"({"prefix": "service status", "format": "json"})";
bufferlist in_bl;
bufferlist out_bl;
int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr);
if (r < 0) {
std::cerr << "rbd: failed to query service status: " << cpp_strerror(r)
<< std::endl;
return r;
}
json_spirit::mValue json_root;
if(!json_spirit::read(out_bl.to_str(), json_root)) {
std::cerr << "rbd: invalid service status JSON received" << std::endl;
return -EBADMSG;
}
auto service_id = m_instance_id_to_service_id.find(instance_id)->second;
bool found_leader = false;
bool found_pool = false;
auto it = m_daemons_metadata.find(service_id);
if (it == m_daemons_metadata.end()) {
return service_id;
try {
auto& service = json_root.get_obj()["rbd-mirror"];
if (service.is_null()) {
return 0;
}
for (auto& daemon_pair : service.get_obj()) {
std::string service_id = daemon_pair.first;
auto it = m_mirror_services.find(service_id);
if (it == m_mirror_services.end()) {
continue;
}
auto& mirror_service = it->second;
auto& daemon = daemon_pair.second.get_obj();
auto& status = daemon["status"];
if (status.is_null()) {
mirror_service.callouts.push_back("not reporting status");
mirror_service.health = MIRROR_HEALTH_WARNING;
continue;
}
auto& json = status.get_obj()["json"];
if (json.is_null()) {
mirror_service.callouts.push_back("not reporting status");
mirror_service.health = MIRROR_HEALTH_WARNING;
continue;
}
json_spirit::mValue json_status;
if(!json_spirit::read(json.get_str(), json_status)) {
std::cerr << "rbd: invalid service status daemon status JSON received"
<< std::endl;
return -EBADMSG;
}
auto& pool_val = json_status.get_obj()[stringify(m_io_ctx.get_id())];
if (pool_val.is_null()) {
mirror_service.callouts.push_back("not reporting status for pool");
mirror_service.health = MIRROR_HEALTH_WARNING;
continue;
}
auto& pool = pool_val.get_obj();
found_pool = true;
auto& instance_id = pool["instance_id"];
if (!instance_id.is_null()) {
mirror_service.instance_id = instance_id.get_str();
m_instance_to_service_ids[mirror_service.instance_id] = service_id;
}
auto& leader = pool["leader"];
if (!leader.is_null() && leader.get_bool()) {
mirror_service.leader = true;
found_leader = true;
}
MirrorHealth mirror_service_health = MIRROR_HEALTH_OK;
auto& callouts = pool["callouts"];
if (!callouts.is_null()) {
for (auto& callout_pair : callouts.get_obj()) {
auto& callout = callout_pair.second.get_obj();
auto& level = callout["level"];
if (level.is_null()) {
continue;
}
auto& level_str = level.get_str();
if (mirror_service_health < MIRROR_HEALTH_ERROR &&
level_str == "error") {
mirror_service_health = MIRROR_HEALTH_ERROR;
} else if (mirror_service_health < MIRROR_HEALTH_WARNING &&
level_str == "warning") {
mirror_service_health = MIRROR_HEALTH_WARNING;
}
auto& text = callout["text"];
if (!text.is_null()) {
mirror_service.callouts.push_back(text.get_str());
}
}
}
mirror_service.health = mirror_service_health;
}
} catch (std::runtime_error&) {
std::cerr << "rbd: unexpected service status JSON received" << std::endl;
return -EBADMSG;
}
auto &metadata = it->second;
auto iter = metadata.find("id");
std::string description = (iter != metadata.end()) ?
iter->second : service_id;
iter = metadata.find("hostname");
if (iter != metadata.end()) {
description += " on " + iter->second;
// compute overall daemon health
m_daemon_health = MIRROR_HEALTH_OK;
if (!found_pool) {
// no daemons are reporting status for this pool
m_daemon_health = MIRROR_HEALTH_ERROR;
} else if (!found_leader) {
// no daemons are reporting leader role for this pool
m_daemon_health = MIRROR_HEALTH_WARNING;
}
return description;
}
void MirrorDaemonServiceInfo::dump(
const std::string &instance_id,
argument_types::Format::Formatter formatter) const {
formatter->open_object_section("daemon_service");
BOOST_SCOPE_EXIT(formatter) {
formatter->close_section();
} BOOST_SCOPE_EXIT_END;
if (instance_id.empty() ||
!m_instance_id_to_service_id.count(instance_id)) {
return;
for (auto& pair : m_mirror_services) {
m_daemon_health = std::max(m_daemon_health, pair.second.health);
}
auto service_id = m_instance_id_to_service_id.find(instance_id)->second;
formatter->dump_string("service_id", service_id);
formatter->dump_string("instance_id", instance_id);
auto it = m_daemons_metadata.find(service_id);
if (it == m_daemons_metadata.end()) {
return;
}
auto &metadata = it->second;
auto iter = metadata.find("id");
if (iter != metadata.end()) {
formatter->dump_string("daemon_id", iter->second);
}
iter = metadata.find("hostname");
if (iter != metadata.end()) {
formatter->dump_string("hostname", iter->second);
}
return 0;
}
} // namespace rbd

View File

@ -7,11 +7,44 @@
#include "include/rados/librados_fwd.hpp"
#include "tools/rbd/ArgumentTypes.h"
#include <string>
#include <iosfwd>
#include <list>
#include <map>
#include <string>
namespace rbd {
enum MirrorHealth {
MIRROR_HEALTH_OK = 0,
MIRROR_HEALTH_UNKNOWN = 1,
MIRROR_HEALTH_WARNING = 2,
MIRROR_HEALTH_ERROR = 3
};
std::ostream& operator<<(std::ostream& os, MirrorHealth mirror_health);
struct MirrorService {
MirrorService() {}
explicit MirrorService(const std::string& service_id)
: service_id(service_id) {
}
std::string service_id;
std::string instance_id;
bool leader = false;
std::string client_id;
std::string ceph_version;
std::string hostname;
std::list<std::string> callouts;
MirrorHealth health = MIRROR_HEALTH_UNKNOWN;
std::string get_image_description() const;
void dump_image(argument_types::Format::Formatter formatter) const;
};
typedef std::list<MirrorService> MirrorServices;
class MirrorDaemonServiceInfo {
public:
MirrorDaemonServiceInfo(librados::IoCtx &io_ctx) : m_io_ctx(io_ctx) {
@ -19,14 +52,25 @@ public:
int init();
std::string get_description(const std::string &instance_id) const;
void dump(const std::string &instance_id,
argument_types::Format::Formatter formatter) const;
const MirrorService* get_by_service_id(const std::string& service_id) const;
const MirrorService* get_by_instance_id(const std::string& instance_id) const;
MirrorServices get_mirror_services() const;
MirrorHealth get_daemon_health() const {
return m_daemon_health;
}
private:
librados::IoCtx &m_io_ctx;
std::map<std::string, std::string> m_instance_id_to_service_id;
std::map<std::string, std::map<std::string, std::string>> m_daemons_metadata;
std::map<std::string, MirrorService> m_mirror_services;
std::map<std::string, std::string> m_instance_to_service_ids;
MirrorHealth m_daemon_health = MIRROR_HEALTH_UNKNOWN;
int get_mirror_service_dump();
int get_mirror_service_status();
};
} // namespace rbd

View File

@ -320,6 +320,7 @@ int execute_status(const po::variables_map &vm,
}
}
auto mirror_service = daemon_service_info.get_by_instance_id(instance_id);
if (formatter != nullptr) {
formatter->open_object_section("image");
formatter->dump_string("name", image_name);
@ -328,7 +329,9 @@ int execute_status(const po::variables_map &vm,
formatter->dump_string("state", utils::mirror_image_site_status_state(
local_status));
formatter->dump_string("description", local_status.description);
daemon_service_info.dump(instance_id, formatter);
if (mirror_service != nullptr) {
mirror_service->dump_image(formatter);
}
formatter->dump_string("last_update", utils::timestr(
local_status.last_update));
}
@ -360,9 +363,9 @@ int execute_status(const po::variables_map &vm,
std::cout << " state: " << utils::mirror_image_site_status_state(
local_status) << "\n"
<< " description: " << local_status.description << "\n";
if (!instance_id.empty()) {
if (mirror_service != nullptr) {
std::cout << " service: " <<
daemon_service_info.get_description(instance_id) << "\n";
mirror_service->get_image_description() << "\n";
}
std::cout << " last_update: " << utils::timestr(
local_status.last_update) << std::endl;

View File

@ -5,6 +5,7 @@
#include "tools/rbd/MirrorDaemonServiceInfo.h"
#include "tools/rbd/Shell.h"
#include "tools/rbd/Utils.h"
#include "include/buffer.h"
#include "include/Context.h"
#include "include/stringify.h"
#include "include/rbd/librbd.hpp"
@ -637,6 +638,7 @@ protected:
m_instance_ids.count(m_image_id)) ?
m_instance_ids.find(m_image_id)->second : "";
auto mirror_service = m_daemon_service_info.get_by_instance_id(instance_id);
if (m_formatter != nullptr) {
m_formatter->open_object_section("image");
m_formatter->dump_string("name", m_mirror_image_global_status.name);
@ -646,7 +648,9 @@ protected:
m_formatter->dump_string("state", utils::mirror_image_site_status_state(
local_status));
m_formatter->dump_string("description", local_status.description);
m_daemon_service_info.dump(instance_id, m_formatter);
if (mirror_service != nullptr) {
mirror_service->dump_image(m_formatter);
}
m_formatter->dump_string("last_update", utils::timestr(
local_status.last_update));
}
@ -679,9 +683,9 @@ protected:
std::cout << " state: " << utils::mirror_image_site_status_state(
local_status) << std::endl
<< " description: " << local_status.description << std::endl;
if (!instance_id.empty()) {
if (mirror_service != nullptr) {
std::cout << " service: " <<
m_daemon_service_info.get_description(instance_id) << std::endl;
mirror_service->get_image_description() << std::endl;
}
std::cout << " last_update: " << utils::timestr(
local_status.last_update) << std::endl;
@ -777,6 +781,36 @@ private:
};
int get_mirror_image_status(
librados::IoCtx& io_ctx, uint32_t* total_images,
std::map<librbd::mirror_image_status_state_t, int>* mirror_image_states,
MirrorHealth* mirror_image_health) {
librbd::RBD rbd;
int r = rbd.mirror_image_status_summary(io_ctx, mirror_image_states);
if (r < 0) {
std::cerr << "rbd: failed to get status summary for mirrored images: "
<< cpp_strerror(r) << std::endl;
return r;
}
*mirror_image_health = MIRROR_HEALTH_OK;
for (auto &it : *mirror_image_states) {
auto &state = it.first;
if (*mirror_image_health < MIRROR_HEALTH_WARNING &&
(state != MIRROR_IMAGE_STATUS_STATE_REPLAYING &&
state != MIRROR_IMAGE_STATUS_STATE_STOPPED)) {
*mirror_image_health = MIRROR_HEALTH_WARNING;
}
if (*mirror_image_health < MIRROR_HEALTH_ERROR &&
state == MIRROR_IMAGE_STATUS_STATE_ERROR) {
*mirror_image_health = MIRROR_HEALTH_ERROR;
}
*total_images += it.second;
}
return 0;
}
} // anonymous namespace
void get_peer_bootstrap_create_arguments(po::options_description *positional,
@ -1414,50 +1448,45 @@ int execute_status(const po::variables_map &vm,
librbd::RBD rbd;
std::map<librbd::mirror_image_status_state_t, int> states;
r = rbd.mirror_image_status_summary(io_ctx, &states);
uint32_t total_images = 0;
std::map<librbd::mirror_image_status_state_t, int> mirror_image_states;
MirrorHealth mirror_image_health = MIRROR_HEALTH_UNKNOWN;
r = get_mirror_image_status(io_ctx, &total_images, &mirror_image_states,
&mirror_image_health);
if (r < 0) {
std::cerr << "rbd: failed to get status summary for mirrored images: "
<< cpp_strerror(r) << std::endl;
return r;
}
MirrorDaemonServiceInfo daemon_service_info(io_ctx);
r = daemon_service_info.init();
if (r < 0) {
return r;
}
MirrorHealth mirror_daemon_health = daemon_service_info.get_daemon_health();
auto mirror_services = daemon_service_info.get_mirror_services();
auto mirror_health = std::max(mirror_image_health, mirror_daemon_health);
if (formatter != nullptr) {
formatter->open_object_section("status");
}
enum Health {Ok = 0, Warning = 1, Error = 2} health = Ok;
const char *names[] = {"OK", "WARNING", "ERROR"};
int total = 0;
for (auto &it : states) {
auto &state = it.first;
if (health < Warning &&
(state != MIRROR_IMAGE_STATUS_STATE_REPLAYING &&
state != MIRROR_IMAGE_STATUS_STATE_STOPPED)) {
health = Warning;
}
if (health < Error &&
state == MIRROR_IMAGE_STATUS_STATE_ERROR) {
health = Error;
}
total += it.second;
}
if (formatter != nullptr) {
formatter->open_object_section("summary");
formatter->dump_string("health", names[health]);
formatter->dump_stream("health") << mirror_health;
formatter->dump_stream("daemon_health") << mirror_daemon_health;
formatter->dump_stream("image_health") << mirror_image_health;
formatter->open_object_section("states");
for (auto &it : states) {
for (auto &it : mirror_image_states) {
std::string state_name = utils::mirror_image_status_state(it.first);
formatter->dump_int(state_name.c_str(), it.second);
}
formatter->close_section(); // states
formatter->close_section(); // summary
} else {
std::cout << "health: " << names[health] << std::endl;
std::cout << "images: " << total << " total" << std::endl;
for (auto &it : states) {
std::cout << "health: " << mirror_health << std::endl;
std::cout << "daemon health: " << mirror_daemon_health << std::endl;
std::cout << "image health: " << mirror_image_health << std::endl;
std::cout << "images: " << total_images << " total" << std::endl;
for (auto &it : mirror_image_states) {
std::cout << " " << it.second << " "
<< utils::mirror_image_status_state(it.first) << std::endl;
}
@ -1466,6 +1495,53 @@ int execute_status(const po::variables_map &vm,
int ret = 0;
if (verbose) {
// dump per-daemon status
if (formatter != nullptr) {
formatter->open_array_section("daemons");
for (auto& mirror_service : mirror_services) {
formatter->open_object_section("daemon");
formatter->dump_string("service_id", mirror_service.service_id);
formatter->dump_string("instance_id", mirror_service.instance_id);
formatter->dump_string("client_id", mirror_service.client_id);
formatter->dump_string("hostname", mirror_service.hostname);
formatter->dump_string("ceph_version", mirror_service.ceph_version);
formatter->dump_bool("leader", mirror_service.leader);
formatter->dump_stream("health") << mirror_service.health;
if (!mirror_service.callouts.empty()) {
formatter->open_array_section("callouts");
for (auto& callout : mirror_service.callouts) {
formatter->dump_string("callout", callout);
}
formatter->close_section(); // callouts
}
formatter->close_section(); // daemon
}
formatter->close_section(); // daemons
} else {
std::cout << std::endl << "DAEMONS" << std::endl;
if (mirror_services.empty()) {
std::cout << " none" << std::endl;
}
for (auto& mirror_service : mirror_services) {
std::cout << "service " << mirror_service.service_id << ":"
<< std::endl
<< " instance_id: " << mirror_service.instance_id
<< std::endl
<< " client_id: " << mirror_service.client_id << std::endl
<< " hostname: " << mirror_service.hostname << std::endl
<< " version: " << mirror_service.ceph_version << std::endl
<< " leader: " << (mirror_service.leader ? "true" : "false")
<< std::endl
<< " health: " << mirror_service.health << std::endl;
if (!mirror_service.callouts.empty()) {
std::cout << " callouts: " << mirror_service.callouts << std::endl;
}
std::cout << std::endl;
}
std::cout << std::endl;
}
// dump per-image status
std::vector<librbd::mirror_peer_site_t> mirror_peers;
utils::get_mirror_peer_sites(io_ctx, &mirror_peers);
@ -1474,10 +1550,11 @@ int execute_status(const po::variables_map &vm,
if (formatter != nullptr) {
formatter->open_array_section("images");
} else {
std::cout << "IMAGES";
}
std::map<std::string, std::string> instance_ids;
MirrorDaemonServiceInfo daemon_service_info(io_ctx);
std::string start_image_id;
while (true) {
@ -1501,10 +1578,6 @@ int execute_status(const po::variables_map &vm,
start_image_id = ids.rbegin()->first;
}
if (!instance_ids.empty()) {
daemon_service_info.init();
}
ImageRequestGenerator<StatusImageRequest> generator(
io_ctx, instance_ids, mirror_peers, peer_fsid_to_name,
daemon_service_info, formatter);