mirror of
https://github.com/ceph/ceph
synced 2025-02-23 11:07:35 +00:00
mgr,osd: make osd_metric more popular
Signed-off-by: lvshanchun <lvshanchun@gmail.com>
This commit is contained in:
parent
5b6104a1f6
commit
714ffe0d5f
@ -732,7 +732,7 @@ if (WITH_MGR)
|
||||
mgr/DaemonServer.cc
|
||||
mgr/ClusterState.cc
|
||||
mgr/ActivePyModules.cc
|
||||
mgr/OSDHealthMetricCollector.cc
|
||||
mgr/DaemonHealthMetricCollector.cc
|
||||
mgr/StandbyPyModules.cc
|
||||
mgr/PyModule.cc
|
||||
mgr/PyModuleRegistry.cc
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include "msg/Message.h"
|
||||
|
||||
#include "common/perf_counters.h"
|
||||
#include "osd/OSDHealthMetric.h"
|
||||
#include "mgr/DaemonHealthMetric.h"
|
||||
|
||||
class PerfCounterType
|
||||
{
|
||||
@ -98,7 +98,7 @@ public:
|
||||
// for service registration
|
||||
boost::optional<std::map<std::string,std::string>> daemon_status;
|
||||
|
||||
std::vector<OSDHealthMetric> osd_health_metrics;
|
||||
std::vector<DaemonHealthMetric> daemon_health_metrics;
|
||||
|
||||
// encode map<string,map<int32_t,string>> of current config
|
||||
bufferlist config_bl;
|
||||
@ -116,7 +116,7 @@ public:
|
||||
decode(daemon_status, p);
|
||||
}
|
||||
if (header.version >= 5) {
|
||||
decode(osd_health_metrics, p);
|
||||
decode(daemon_health_metrics, p);
|
||||
}
|
||||
if (header.version >= 6) {
|
||||
decode(config_bl, p);
|
||||
@ -131,7 +131,7 @@ public:
|
||||
encode(undeclare_types, payload);
|
||||
encode(service_name, payload);
|
||||
encode(daemon_status, payload);
|
||||
encode(osd_health_metrics, payload);
|
||||
encode(daemon_health_metrics, payload);
|
||||
encode(config_bl, payload);
|
||||
}
|
||||
|
||||
@ -150,8 +150,8 @@ public:
|
||||
if (daemon_status) {
|
||||
out << " status=" << daemon_status->size();
|
||||
}
|
||||
if (!osd_health_metrics.empty()) {
|
||||
out << " osd_metrics=" << osd_health_metrics.size();
|
||||
if (!daemon_health_metrics.empty()) {
|
||||
out << " daemon_metrics=" << daemon_health_metrics.size();
|
||||
}
|
||||
out << ")";
|
||||
}
|
||||
|
@ -6,37 +6,37 @@
|
||||
#include <cstdint>
|
||||
#include "include/denc.h"
|
||||
|
||||
enum class osd_metric : uint8_t {
|
||||
enum class daemon_metric : uint8_t {
|
||||
SLOW_OPS,
|
||||
PENDING_CREATING_PGS,
|
||||
NONE,
|
||||
};
|
||||
|
||||
union osd_metric_t {
|
||||
union daemon_metric_t {
|
||||
struct {
|
||||
uint32_t n1;
|
||||
uint32_t n2;
|
||||
};
|
||||
uint64_t n;
|
||||
osd_metric_t(uint32_t x, uint32_t y)
|
||||
daemon_metric_t(uint32_t x, uint32_t y)
|
||||
: n1(x), n2(y)
|
||||
{}
|
||||
osd_metric_t(uint64_t x = 0)
|
||||
daemon_metric_t(uint64_t x = 0)
|
||||
: n(x)
|
||||
{}
|
||||
};
|
||||
|
||||
class OSDHealthMetric
|
||||
class DaemonHealthMetric
|
||||
{
|
||||
public:
|
||||
OSDHealthMetric() = default;
|
||||
OSDHealthMetric(osd_metric type_, uint64_t n)
|
||||
DaemonHealthMetric() = default;
|
||||
DaemonHealthMetric(daemon_metric type_, uint64_t n)
|
||||
: type(type_), value(n)
|
||||
{}
|
||||
OSDHealthMetric(osd_metric type_, uint32_t n1, uint32_t n2)
|
||||
DaemonHealthMetric(daemon_metric type_, uint32_t n1, uint32_t n2)
|
||||
: type(type_), value(n1, n2)
|
||||
{}
|
||||
osd_metric get_type() const {
|
||||
daemon_metric get_type() const {
|
||||
return type;
|
||||
}
|
||||
uint64_t get_n() const {
|
||||
@ -48,14 +48,14 @@ public:
|
||||
uint32_t get_n2() const {
|
||||
return value.n2;
|
||||
}
|
||||
DENC(OSDHealthMetric, v, p) {
|
||||
DENC(DaemonHealthMetric, v, p) {
|
||||
DENC_START(1, 1, p);
|
||||
denc(v.type, p);
|
||||
denc(v.value.n, p);
|
||||
DENC_FINISH(p);
|
||||
}
|
||||
private:
|
||||
osd_metric type = osd_metric::NONE;
|
||||
osd_metric_t value;
|
||||
daemon_metric type = daemon_metric::NONE;
|
||||
daemon_metric_t value;
|
||||
};
|
||||
WRITE_CLASS_DENC(OSDHealthMetric)
|
||||
WRITE_CLASS_DENC(DaemonHealthMetric)
|
@ -2,64 +2,64 @@
|
||||
|
||||
#include "include/health.h"
|
||||
#include "include/types.h"
|
||||
#include "OSDHealthMetricCollector.h"
|
||||
#include "DaemonHealthMetricCollector.h"
|
||||
|
||||
|
||||
|
||||
ostream& operator<<(ostream& os,
|
||||
const OSDHealthMetricCollector::DaemonKey& daemon) {
|
||||
const DaemonHealthMetricCollector::DaemonKey& daemon) {
|
||||
return os << daemon.first << "." << daemon.second;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class SlowOps final : public OSDHealthMetricCollector {
|
||||
bool _is_relevant(osd_metric type) const override {
|
||||
return type == osd_metric::SLOW_OPS;
|
||||
class SlowOps final : public DaemonHealthMetricCollector {
|
||||
bool _is_relevant(daemon_metric type) const override {
|
||||
return type == daemon_metric::SLOW_OPS;
|
||||
}
|
||||
health_check_t& _get_check(health_check_map_t& cm) const override {
|
||||
return cm.get_or_add("SLOW_OPS", HEALTH_WARN, "");
|
||||
}
|
||||
bool _update(const DaemonKey& osd,
|
||||
const OSDHealthMetric& metric) override {
|
||||
bool _update(const DaemonKey& daemon,
|
||||
const DaemonHealthMetric& metric) override {
|
||||
auto num_slow = metric.get_n1();
|
||||
auto blocked_time = metric.get_n2();
|
||||
value.n1 += num_slow;
|
||||
value.n2 = std::max(value.n2, blocked_time);
|
||||
if (num_slow || blocked_time) {
|
||||
osds.push_back(osd);
|
||||
daemons.push_back(daemon);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
void _summarize(health_check_t& check) const override {
|
||||
if (osds.empty()) {
|
||||
if (daemons.empty()) {
|
||||
return;
|
||||
}
|
||||
static const char* fmt = "%1% slow ops, oldest one blocked for %2% sec";
|
||||
check.summary = boost::str(boost::format(fmt) % value.n1 % value.n2);
|
||||
ostringstream ss;
|
||||
if (osds.size() > 1) {
|
||||
ss << "osds " << osds << " have slow ops.";
|
||||
if (daemons.size() > 1) {
|
||||
ss << "daemons " << daemons << " have slow ops.";
|
||||
} else {
|
||||
ss << osds.front() << " has slow ops";
|
||||
ss << daemons.front() << " has slow ops";
|
||||
}
|
||||
check.detail.push_back(ss.str());
|
||||
}
|
||||
vector<DaemonKey> osds;
|
||||
vector<DaemonKey> daemons;
|
||||
};
|
||||
|
||||
|
||||
class PendingPGs final : public OSDHealthMetricCollector {
|
||||
bool _is_relevant(osd_metric type) const override {
|
||||
return type == osd_metric::PENDING_CREATING_PGS;
|
||||
class PendingPGs final : public DaemonHealthMetricCollector {
|
||||
bool _is_relevant(daemon_metric type) const override {
|
||||
return type == daemon_metric::PENDING_CREATING_PGS;
|
||||
}
|
||||
health_check_t& _get_check(health_check_map_t& cm) const override {
|
||||
return cm.get_or_add("PENDING_CREATING_PGS", HEALTH_WARN, "");
|
||||
}
|
||||
bool _update(const DaemonKey& osd,
|
||||
const OSDHealthMetric& metric) override {
|
||||
const DaemonHealthMetric& metric) override {
|
||||
value.n += metric.get_n();
|
||||
if (metric.get_n()) {
|
||||
osds.push_back(osd);
|
||||
@ -87,15 +87,15 @@ class PendingPGs final : public OSDHealthMetricCollector {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
unique_ptr<OSDHealthMetricCollector>
|
||||
OSDHealthMetricCollector::create(osd_metric m)
|
||||
unique_ptr<DaemonHealthMetricCollector>
|
||||
DaemonHealthMetricCollector::create(daemon_metric m)
|
||||
{
|
||||
switch (m) {
|
||||
case osd_metric::SLOW_OPS:
|
||||
return unique_ptr<OSDHealthMetricCollector>{new SlowOps};
|
||||
case osd_metric::PENDING_CREATING_PGS:
|
||||
return unique_ptr<OSDHealthMetricCollector>{new PendingPGs};
|
||||
case daemon_metric::SLOW_OPS:
|
||||
return unique_ptr<DaemonHealthMetricCollector>{new SlowOps};
|
||||
case daemon_metric::PENDING_CREATING_PGS:
|
||||
return unique_ptr<DaemonHealthMetricCollector>{new PendingPGs};
|
||||
default:
|
||||
return unique_ptr<OSDHealthMetricCollector>{};
|
||||
return unique_ptr<DaemonHealthMetricCollector>{};
|
||||
}
|
||||
}
|
32
src/mgr/DaemonHealthMetricCollector.h
Normal file
32
src/mgr/DaemonHealthMetricCollector.h
Normal file
@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "DaemonHealthMetric.h"
|
||||
#include "mon/health_check.h"
|
||||
|
||||
class DaemonHealthMetricCollector {
|
||||
public:
|
||||
using DaemonKey = std::pair<std::string, std::string>;
|
||||
static std::unique_ptr<DaemonHealthMetricCollector> create(daemon_metric m);
|
||||
void update(const DaemonKey& daemon, const DaemonHealthMetric& metric) {
|
||||
if (_is_relevant(metric.get_type())) {
|
||||
reported = _update(daemon, metric);
|
||||
}
|
||||
}
|
||||
void summarize(health_check_map_t& cm) {
|
||||
if (reported) {
|
||||
_summarize(_get_check(cm));
|
||||
}
|
||||
}
|
||||
virtual ~DaemonHealthMetricCollector() {}
|
||||
private:
|
||||
virtual bool _is_relevant(daemon_metric type) const = 0;
|
||||
virtual health_check_t& _get_check(health_check_map_t& cm) const = 0;
|
||||
virtual bool _update(const DaemonKey& daemon, const DaemonHealthMetric& metric) = 0;
|
||||
virtual void _summarize(health_check_t& check) const = 0;
|
||||
protected:
|
||||
daemon_metric_t value;
|
||||
bool reported = false;
|
||||
};
|
@ -20,7 +20,7 @@
|
||||
#include "json_spirit/json_spirit_writer.h"
|
||||
|
||||
#include "mgr/mgr_commands.h"
|
||||
#include "mgr/OSDHealthMetricCollector.h"
|
||||
#include "mgr/DaemonHealthMetricCollector.h"
|
||||
#include "mon/MonCommand.h"
|
||||
|
||||
#include "messages/MMgrOpen.h"
|
||||
@ -524,7 +524,7 @@ bool DaemonServer::handle_report(MMgrReport *m)
|
||||
}
|
||||
if (m->get_connection()->peer_is_osd()) {
|
||||
// only OSD sends health_checks to me now
|
||||
daemon->osd_health_metrics = std::move(m->osd_health_metrics);
|
||||
daemon->daemon_health_metrics = std::move(m->daemon_health_metrics);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1701,13 +1701,13 @@ void DaemonServer::send_report()
|
||||
});
|
||||
|
||||
auto osds = daemon_state.get_by_service("osd");
|
||||
map<osd_metric, unique_ptr<OSDHealthMetricCollector>> accumulated;
|
||||
map<daemon_metric, unique_ptr<DaemonHealthMetricCollector>> accumulated;
|
||||
for (const auto& osd : osds) {
|
||||
Mutex::Locker l(osd.second->lock);
|
||||
for (const auto& metric : osd.second->osd_health_metrics) {
|
||||
for (const auto& metric : osd.second->daemon_health_metrics) {
|
||||
auto acc = accumulated.find(metric.get_type());
|
||||
if (acc == accumulated.end()) {
|
||||
auto collector = OSDHealthMetricCollector::create(metric.get_type());
|
||||
auto collector = DaemonHealthMetricCollector::create(metric.get_type());
|
||||
if (!collector) {
|
||||
derr << __func__ << " " << osd.first << "." << osd.second
|
||||
<< " sent me an unknown health metric: "
|
||||
|
@ -98,7 +98,7 @@ class DaemonState
|
||||
std::map<std::string, std::string> metadata;
|
||||
|
||||
// TODO: this can be generalized to other daemons
|
||||
std::vector<OSDHealthMetric> osd_health_metrics;
|
||||
std::vector<DaemonHealthMetric> daemon_health_metrics;
|
||||
|
||||
// Ephemeral state
|
||||
bool service_daemon = false;
|
||||
|
@ -329,7 +329,7 @@ void MgrClient::send_report()
|
||||
daemon_dirty_status = false;
|
||||
}
|
||||
|
||||
report->osd_health_metrics = std::move(osd_health_metrics);
|
||||
report->daemon_health_metrics = std::move(daemon_health_metrics);
|
||||
|
||||
cct->_conf->get_config_bl(last_config_bl_version, &report->config_bl,
|
||||
&last_config_bl_version);
|
||||
@ -476,7 +476,8 @@ int MgrClient::service_daemon_update_status(
|
||||
return 0;
|
||||
}
|
||||
|
||||
void MgrClient::update_osd_health(std::vector<OSDHealthMetric>&& metrics)
|
||||
void MgrClient::update_daemon_health(std::vector<DaemonHealthMetric>&& metrics)
|
||||
{
|
||||
osd_health_metrics = std::move(metrics);
|
||||
daemon_health_metrics = std::move(metrics);
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
#include "msg/Connection.h"
|
||||
#include "msg/Dispatcher.h"
|
||||
#include "mon/MgrMap.h"
|
||||
#include "osd/OSDHealthMetric.h"
|
||||
#include "mgr/DaemonHealthMetric.h"
|
||||
|
||||
#include "common/perf_counters.h"
|
||||
#include "common/Timer.h"
|
||||
@ -81,7 +81,7 @@ protected:
|
||||
std::string service_name, daemon_name;
|
||||
std::map<std::string,std::string> daemon_metadata;
|
||||
std::map<std::string,std::string> daemon_status;
|
||||
std::vector<OSDHealthMetric> osd_health_metrics;
|
||||
std::vector<DaemonHealthMetric> daemon_health_metrics;
|
||||
|
||||
void reconnect();
|
||||
void _send_open();
|
||||
@ -120,7 +120,7 @@ public:
|
||||
const std::map<std::string,std::string>& metadata);
|
||||
int service_daemon_update_status(
|
||||
std::map<std::string,std::string>&& status);
|
||||
void update_osd_health(std::vector<OSDHealthMetric>&& metrics);
|
||||
void update_daemon_health(std::vector<DaemonHealthMetric>&& metrics);
|
||||
|
||||
private:
|
||||
void send_stats();
|
||||
|
@ -1,30 +0,0 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "osd/OSDHealthMetric.h"
|
||||
#include "mon/health_check.h"
|
||||
|
||||
class OSDHealthMetricCollector {
|
||||
public:
|
||||
using DaemonKey = std::pair<std::string, std::string>;
|
||||
static std::unique_ptr<OSDHealthMetricCollector> create(osd_metric m);
|
||||
void update(const DaemonKey& osd, const OSDHealthMetric& metric) {
|
||||
if (_is_relevant(metric.get_type())) {
|
||||
reported = _update(osd, metric);
|
||||
}
|
||||
}
|
||||
void summarize(health_check_map_t& cm) {
|
||||
if (reported) {
|
||||
_summarize(_get_check(cm));
|
||||
}
|
||||
}
|
||||
virtual ~OSDHealthMetricCollector() {}
|
||||
private:
|
||||
virtual bool _is_relevant(osd_metric type) const = 0;
|
||||
virtual health_check_t& _get_check(health_check_map_t& cm) const = 0;
|
||||
virtual bool _update(const DaemonKey& osd, const OSDHealthMetric& metric) = 0;
|
||||
virtual void _summarize(health_check_t& check) const = 0;
|
||||
protected:
|
||||
osd_metric_t value;
|
||||
bool reported = false;
|
||||
};
|
@ -4985,7 +4985,7 @@ void OSD::tick_without_osd_lock()
|
||||
}
|
||||
}
|
||||
|
||||
mgrc.update_osd_health(get_health_metrics());
|
||||
mgrc.update_daemon_health(get_health_metrics());
|
||||
service.kick_recovery_queue();
|
||||
tick_timer_without_osd_lock.add_event_after(OSD_TICK_INTERVAL, new C_Tick_WithoutOSDLock(this));
|
||||
}
|
||||
@ -7065,9 +7065,9 @@ MPGStats* OSD::collect_pg_stats()
|
||||
return m;
|
||||
}
|
||||
|
||||
vector<OSDHealthMetric> OSD::get_health_metrics()
|
||||
vector<DaemonHealthMetric> OSD::get_health_metrics()
|
||||
{
|
||||
vector<OSDHealthMetric> metrics;
|
||||
vector<DaemonHealthMetric> metrics;
|
||||
{
|
||||
utime_t oldest_secs;
|
||||
const utime_t now = ceph_clock_now();
|
||||
@ -7083,10 +7083,10 @@ vector<OSDHealthMetric> OSD::get_health_metrics()
|
||||
}
|
||||
};
|
||||
if (op_tracker.visit_ops_in_flight(&oldest_secs, count_slow_ops)) {
|
||||
metrics.emplace_back(osd_metric::SLOW_OPS, slow, oldest_secs);
|
||||
metrics.emplace_back(daemon_metric::SLOW_OPS, slow, oldest_secs);
|
||||
} else {
|
||||
// no news is not good news.
|
||||
metrics.emplace_back(osd_metric::SLOW_OPS, 0, 0);
|
||||
metrics.emplace_back(daemon_metric::SLOW_OPS, 0, 0);
|
||||
}
|
||||
}
|
||||
with_unique_lock(pending_creates_lock, [&]() {
|
||||
@ -7096,7 +7096,7 @@ vector<OSDHealthMetric> OSD::get_health_metrics()
|
||||
n_primaries++;
|
||||
}
|
||||
}
|
||||
metrics.emplace_back(osd_metric::PENDING_CREATING_PGS, n_primaries);
|
||||
metrics.emplace_back(daemon_metric::PENDING_CREATING_PGS, n_primaries);
|
||||
});
|
||||
return metrics;
|
||||
}
|
||||
|
@ -2091,7 +2091,7 @@ protected:
|
||||
|
||||
// -- status reporting --
|
||||
MPGStats *collect_pg_stats();
|
||||
std::vector<OSDHealthMetric> get_health_metrics();
|
||||
std::vector<DaemonHealthMetric> get_health_metrics();
|
||||
|
||||
private:
|
||||
bool ms_can_fast_dispatch_any() const override { return true; }
|
||||
|
Loading…
Reference in New Issue
Block a user