mgr,osd: make osd_metric more popular

Signed-off-by: lvshanchun <lvshanchun@gmail.com>
This commit is contained in:
lvshanchun 2018-03-13 14:41:14 +08:00
parent 5b6104a1f6
commit 714ffe0d5f
12 changed files with 96 additions and 93 deletions

View File

@ -732,7 +732,7 @@ if (WITH_MGR)
mgr/DaemonServer.cc
mgr/ClusterState.cc
mgr/ActivePyModules.cc
mgr/OSDHealthMetricCollector.cc
mgr/DaemonHealthMetricCollector.cc
mgr/StandbyPyModules.cc
mgr/PyModule.cc
mgr/PyModuleRegistry.cc

View File

@ -20,7 +20,7 @@
#include "msg/Message.h"
#include "common/perf_counters.h"
#include "osd/OSDHealthMetric.h"
#include "mgr/DaemonHealthMetric.h"
class PerfCounterType
{
@ -98,7 +98,7 @@ public:
// for service registration
boost::optional<std::map<std::string,std::string>> daemon_status;
std::vector<OSDHealthMetric> osd_health_metrics;
std::vector<DaemonHealthMetric> daemon_health_metrics;
// encode map<string,map<int32_t,string>> of current config
bufferlist config_bl;
@ -116,7 +116,7 @@ public:
decode(daemon_status, p);
}
if (header.version >= 5) {
decode(osd_health_metrics, p);
decode(daemon_health_metrics, p);
}
if (header.version >= 6) {
decode(config_bl, p);
@ -131,7 +131,7 @@ public:
encode(undeclare_types, payload);
encode(service_name, payload);
encode(daemon_status, payload);
encode(osd_health_metrics, payload);
encode(daemon_health_metrics, payload);
encode(config_bl, payload);
}
@ -150,8 +150,8 @@ public:
if (daemon_status) {
out << " status=" << daemon_status->size();
}
if (!osd_health_metrics.empty()) {
out << " osd_metrics=" << osd_health_metrics.size();
if (!daemon_health_metrics.empty()) {
out << " daemon_metrics=" << daemon_health_metrics.size();
}
out << ")";
}

View File

@ -6,37 +6,37 @@
#include <cstdint>
#include "include/denc.h"
enum class osd_metric : uint8_t {
enum class daemon_metric : uint8_t {
SLOW_OPS,
PENDING_CREATING_PGS,
NONE,
};
union osd_metric_t {
union daemon_metric_t {
struct {
uint32_t n1;
uint32_t n2;
};
uint64_t n;
osd_metric_t(uint32_t x, uint32_t y)
daemon_metric_t(uint32_t x, uint32_t y)
: n1(x), n2(y)
{}
osd_metric_t(uint64_t x = 0)
daemon_metric_t(uint64_t x = 0)
: n(x)
{}
};
class OSDHealthMetric
class DaemonHealthMetric
{
public:
OSDHealthMetric() = default;
OSDHealthMetric(osd_metric type_, uint64_t n)
DaemonHealthMetric() = default;
DaemonHealthMetric(daemon_metric type_, uint64_t n)
: type(type_), value(n)
{}
OSDHealthMetric(osd_metric type_, uint32_t n1, uint32_t n2)
DaemonHealthMetric(daemon_metric type_, uint32_t n1, uint32_t n2)
: type(type_), value(n1, n2)
{}
osd_metric get_type() const {
daemon_metric get_type() const {
return type;
}
uint64_t get_n() const {
@ -48,14 +48,14 @@ public:
uint32_t get_n2() const {
return value.n2;
}
DENC(OSDHealthMetric, v, p) {
DENC(DaemonHealthMetric, v, p) {
DENC_START(1, 1, p);
denc(v.type, p);
denc(v.value.n, p);
DENC_FINISH(p);
}
private:
osd_metric type = osd_metric::NONE;
osd_metric_t value;
daemon_metric type = daemon_metric::NONE;
daemon_metric_t value;
};
WRITE_CLASS_DENC(OSDHealthMetric)
WRITE_CLASS_DENC(DaemonHealthMetric)

View File

@ -2,64 +2,64 @@
#include "include/health.h"
#include "include/types.h"
#include "OSDHealthMetricCollector.h"
#include "DaemonHealthMetricCollector.h"
ostream& operator<<(ostream& os,
const OSDHealthMetricCollector::DaemonKey& daemon) {
const DaemonHealthMetricCollector::DaemonKey& daemon) {
return os << daemon.first << "." << daemon.second;
}
namespace {
class SlowOps final : public OSDHealthMetricCollector {
bool _is_relevant(osd_metric type) const override {
return type == osd_metric::SLOW_OPS;
class SlowOps final : public DaemonHealthMetricCollector {
bool _is_relevant(daemon_metric type) const override {
return type == daemon_metric::SLOW_OPS;
}
health_check_t& _get_check(health_check_map_t& cm) const override {
return cm.get_or_add("SLOW_OPS", HEALTH_WARN, "");
}
bool _update(const DaemonKey& osd,
const OSDHealthMetric& metric) override {
bool _update(const DaemonKey& daemon,
const DaemonHealthMetric& metric) override {
auto num_slow = metric.get_n1();
auto blocked_time = metric.get_n2();
value.n1 += num_slow;
value.n2 = std::max(value.n2, blocked_time);
if (num_slow || blocked_time) {
osds.push_back(osd);
daemons.push_back(daemon);
return true;
} else {
return false;
}
}
void _summarize(health_check_t& check) const override {
if (osds.empty()) {
if (daemons.empty()) {
return;
}
static const char* fmt = "%1% slow ops, oldest one blocked for %2% sec";
check.summary = boost::str(boost::format(fmt) % value.n1 % value.n2);
ostringstream ss;
if (osds.size() > 1) {
ss << "osds " << osds << " have slow ops.";
if (daemons.size() > 1) {
ss << "daemons " << daemons << " have slow ops.";
} else {
ss << osds.front() << " has slow ops";
ss << daemons.front() << " has slow ops";
}
check.detail.push_back(ss.str());
}
vector<DaemonKey> osds;
vector<DaemonKey> daemons;
};
class PendingPGs final : public OSDHealthMetricCollector {
bool _is_relevant(osd_metric type) const override {
return type == osd_metric::PENDING_CREATING_PGS;
class PendingPGs final : public DaemonHealthMetricCollector {
bool _is_relevant(daemon_metric type) const override {
return type == daemon_metric::PENDING_CREATING_PGS;
}
health_check_t& _get_check(health_check_map_t& cm) const override {
return cm.get_or_add("PENDING_CREATING_PGS", HEALTH_WARN, "");
}
bool _update(const DaemonKey& osd,
const OSDHealthMetric& metric) override {
const DaemonHealthMetric& metric) override {
value.n += metric.get_n();
if (metric.get_n()) {
osds.push_back(osd);
@ -87,15 +87,15 @@ class PendingPGs final : public OSDHealthMetricCollector {
} // anonymous namespace
unique_ptr<OSDHealthMetricCollector>
OSDHealthMetricCollector::create(osd_metric m)
unique_ptr<DaemonHealthMetricCollector>
DaemonHealthMetricCollector::create(daemon_metric m)
{
switch (m) {
case osd_metric::SLOW_OPS:
return unique_ptr<OSDHealthMetricCollector>{new SlowOps};
case osd_metric::PENDING_CREATING_PGS:
return unique_ptr<OSDHealthMetricCollector>{new PendingPGs};
case daemon_metric::SLOW_OPS:
return unique_ptr<DaemonHealthMetricCollector>{new SlowOps};
case daemon_metric::PENDING_CREATING_PGS:
return unique_ptr<DaemonHealthMetricCollector>{new PendingPGs};
default:
return unique_ptr<OSDHealthMetricCollector>{};
return unique_ptr<DaemonHealthMetricCollector>{};
}
}

View File

@ -0,0 +1,32 @@
#pragma once
#include <memory>
#include <string>
#include "DaemonHealthMetric.h"
#include "mon/health_check.h"
class DaemonHealthMetricCollector {
public:
using DaemonKey = std::pair<std::string, std::string>;
static std::unique_ptr<DaemonHealthMetricCollector> create(daemon_metric m);
void update(const DaemonKey& daemon, const DaemonHealthMetric& metric) {
if (_is_relevant(metric.get_type())) {
reported = _update(daemon, metric);
}
}
void summarize(health_check_map_t& cm) {
if (reported) {
_summarize(_get_check(cm));
}
}
virtual ~DaemonHealthMetricCollector() {}
private:
virtual bool _is_relevant(daemon_metric type) const = 0;
virtual health_check_t& _get_check(health_check_map_t& cm) const = 0;
virtual bool _update(const DaemonKey& daemon, const DaemonHealthMetric& metric) = 0;
virtual void _summarize(health_check_t& check) const = 0;
protected:
daemon_metric_t value;
bool reported = false;
};

View File

@ -20,7 +20,7 @@
#include "json_spirit/json_spirit_writer.h"
#include "mgr/mgr_commands.h"
#include "mgr/OSDHealthMetricCollector.h"
#include "mgr/DaemonHealthMetricCollector.h"
#include "mon/MonCommand.h"
#include "messages/MMgrOpen.h"
@ -524,7 +524,7 @@ bool DaemonServer::handle_report(MMgrReport *m)
}
if (m->get_connection()->peer_is_osd()) {
// only OSD sends health_checks to me now
daemon->osd_health_metrics = std::move(m->osd_health_metrics);
daemon->daemon_health_metrics = std::move(m->daemon_health_metrics);
}
}
@ -1701,13 +1701,13 @@ void DaemonServer::send_report()
});
auto osds = daemon_state.get_by_service("osd");
map<osd_metric, unique_ptr<OSDHealthMetricCollector>> accumulated;
map<daemon_metric, unique_ptr<DaemonHealthMetricCollector>> accumulated;
for (const auto& osd : osds) {
Mutex::Locker l(osd.second->lock);
for (const auto& metric : osd.second->osd_health_metrics) {
for (const auto& metric : osd.second->daemon_health_metrics) {
auto acc = accumulated.find(metric.get_type());
if (acc == accumulated.end()) {
auto collector = OSDHealthMetricCollector::create(metric.get_type());
auto collector = DaemonHealthMetricCollector::create(metric.get_type());
if (!collector) {
derr << __func__ << " " << osd.first << "." << osd.second
<< " sent me an unknown health metric: "

View File

@ -98,7 +98,7 @@ class DaemonState
std::map<std::string, std::string> metadata;
// TODO: this can be generalized to other daemons
std::vector<OSDHealthMetric> osd_health_metrics;
std::vector<DaemonHealthMetric> daemon_health_metrics;
// Ephemeral state
bool service_daemon = false;

View File

@ -329,7 +329,7 @@ void MgrClient::send_report()
daemon_dirty_status = false;
}
report->osd_health_metrics = std::move(osd_health_metrics);
report->daemon_health_metrics = std::move(daemon_health_metrics);
cct->_conf->get_config_bl(last_config_bl_version, &report->config_bl,
&last_config_bl_version);
@ -476,7 +476,8 @@ int MgrClient::service_daemon_update_status(
return 0;
}
void MgrClient::update_osd_health(std::vector<OSDHealthMetric>&& metrics)
void MgrClient::update_daemon_health(std::vector<DaemonHealthMetric>&& metrics)
{
osd_health_metrics = std::move(metrics);
daemon_health_metrics = std::move(metrics);
}

View File

@ -17,7 +17,7 @@
#include "msg/Connection.h"
#include "msg/Dispatcher.h"
#include "mon/MgrMap.h"
#include "osd/OSDHealthMetric.h"
#include "mgr/DaemonHealthMetric.h"
#include "common/perf_counters.h"
#include "common/Timer.h"
@ -81,7 +81,7 @@ protected:
std::string service_name, daemon_name;
std::map<std::string,std::string> daemon_metadata;
std::map<std::string,std::string> daemon_status;
std::vector<OSDHealthMetric> osd_health_metrics;
std::vector<DaemonHealthMetric> daemon_health_metrics;
void reconnect();
void _send_open();
@ -120,7 +120,7 @@ public:
const std::map<std::string,std::string>& metadata);
int service_daemon_update_status(
std::map<std::string,std::string>&& status);
void update_osd_health(std::vector<OSDHealthMetric>&& metrics);
void update_daemon_health(std::vector<DaemonHealthMetric>&& metrics);
private:
void send_stats();

View File

@ -1,30 +0,0 @@
#include <memory>
#include <string>
#include "osd/OSDHealthMetric.h"
#include "mon/health_check.h"
class OSDHealthMetricCollector {
public:
using DaemonKey = std::pair<std::string, std::string>;
static std::unique_ptr<OSDHealthMetricCollector> create(osd_metric m);
void update(const DaemonKey& osd, const OSDHealthMetric& metric) {
if (_is_relevant(metric.get_type())) {
reported = _update(osd, metric);
}
}
void summarize(health_check_map_t& cm) {
if (reported) {
_summarize(_get_check(cm));
}
}
virtual ~OSDHealthMetricCollector() {}
private:
virtual bool _is_relevant(osd_metric type) const = 0;
virtual health_check_t& _get_check(health_check_map_t& cm) const = 0;
virtual bool _update(const DaemonKey& osd, const OSDHealthMetric& metric) = 0;
virtual void _summarize(health_check_t& check) const = 0;
protected:
osd_metric_t value;
bool reported = false;
};

View File

@ -4985,7 +4985,7 @@ void OSD::tick_without_osd_lock()
}
}
mgrc.update_osd_health(get_health_metrics());
mgrc.update_daemon_health(get_health_metrics());
service.kick_recovery_queue();
tick_timer_without_osd_lock.add_event_after(OSD_TICK_INTERVAL, new C_Tick_WithoutOSDLock(this));
}
@ -7065,9 +7065,9 @@ MPGStats* OSD::collect_pg_stats()
return m;
}
vector<OSDHealthMetric> OSD::get_health_metrics()
vector<DaemonHealthMetric> OSD::get_health_metrics()
{
vector<OSDHealthMetric> metrics;
vector<DaemonHealthMetric> metrics;
{
utime_t oldest_secs;
const utime_t now = ceph_clock_now();
@ -7083,10 +7083,10 @@ vector<OSDHealthMetric> OSD::get_health_metrics()
}
};
if (op_tracker.visit_ops_in_flight(&oldest_secs, count_slow_ops)) {
metrics.emplace_back(osd_metric::SLOW_OPS, slow, oldest_secs);
metrics.emplace_back(daemon_metric::SLOW_OPS, slow, oldest_secs);
} else {
// no news is not good news.
metrics.emplace_back(osd_metric::SLOW_OPS, 0, 0);
metrics.emplace_back(daemon_metric::SLOW_OPS, 0, 0);
}
}
with_unique_lock(pending_creates_lock, [&]() {
@ -7096,7 +7096,7 @@ vector<OSDHealthMetric> OSD::get_health_metrics()
n_primaries++;
}
}
metrics.emplace_back(osd_metric::PENDING_CREATING_PGS, n_primaries);
metrics.emplace_back(daemon_metric::PENDING_CREATING_PGS, n_primaries);
});
return metrics;
}

View File

@ -2091,7 +2091,7 @@ protected:
// -- status reporting --
MPGStats *collect_pg_stats();
std::vector<OSDHealthMetric> get_health_metrics();
std::vector<DaemonHealthMetric> get_health_metrics();
private:
bool ms_can_fast_dispatch_any() const override { return true; }