mirror of
https://github.com/ceph/ceph
synced 2025-02-23 19:17:37 +00:00
Merge pull request #48166 from rhcs-dashboard/fix-empty-pid-path-case
exporter: don't skip loop if pid path is empty Reviewed-by: Pere Diaz Bou <pdiazbou@redhat.com>
This commit is contained in:
commit
ab17e4ba1f
@ -1,13 +1,4 @@
|
||||
#include "DaemonMetricCollector.h"
|
||||
#include "common/admin_socket_client.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/hostname.h"
|
||||
#include "common/perf_counters.h"
|
||||
#include "global/global_init.h"
|
||||
#include "global/global_context.h"
|
||||
#include "common/split.h"
|
||||
#include "include/common_fwd.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <boost/json/src.hpp>
|
||||
#include <chrono>
|
||||
@ -19,6 +10,16 @@
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "common/admin_socket_client.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/hostname.h"
|
||||
#include "common/perf_counters.h"
|
||||
#include "common/split.h"
|
||||
#include "global/global_context.h"
|
||||
#include "global/global_init.h"
|
||||
#include "include/common_fwd.h"
|
||||
#include "util.h"
|
||||
|
||||
#define dout_context g_ceph_context
|
||||
#define dout_subsys ceph_subsys_ceph_exporter
|
||||
|
||||
@ -86,48 +87,60 @@ void DaemonMetricCollector::dump_asok_metrics() {
|
||||
|
||||
std::vector<std::pair<std::string, int>> daemon_pids;
|
||||
|
||||
int failures = 0;
|
||||
bool sort = g_conf().get_val<bool>("exporter_sort_metrics");
|
||||
if (sort) {
|
||||
builder = std::unique_ptr<OrderedMetricsBuilder>(new OrderedMetricsBuilder());
|
||||
builder =
|
||||
std::unique_ptr<OrderedMetricsBuilder>(new OrderedMetricsBuilder());
|
||||
} else {
|
||||
builder = std::unique_ptr<UnorderedMetricsBuilder>(new UnorderedMetricsBuilder());
|
||||
builder =
|
||||
std::unique_ptr<UnorderedMetricsBuilder>(new UnorderedMetricsBuilder());
|
||||
}
|
||||
for (auto &[daemon_name, sock_client] : clients) {
|
||||
bool ok;
|
||||
sock_client.ping(&ok);
|
||||
if (!ok) {
|
||||
failures++;
|
||||
continue;
|
||||
}
|
||||
std::string perf_dump_response = asok_request(sock_client, "perf dump", daemon_name);
|
||||
std::string perf_dump_response =
|
||||
asok_request(sock_client, "perf dump", daemon_name);
|
||||
if (perf_dump_response.size() == 0) {
|
||||
failures++;
|
||||
continue;
|
||||
}
|
||||
std::string perf_schema_response = asok_request(sock_client, "perf schema", daemon_name);
|
||||
std::string perf_schema_response =
|
||||
asok_request(sock_client, "perf schema", daemon_name);
|
||||
if (perf_schema_response.size() == 0) {
|
||||
failures++;
|
||||
continue;
|
||||
}
|
||||
std::string config_show =
|
||||
asok_request(sock_client, "config show", daemon_name);
|
||||
if (config_show.size() == 0) {
|
||||
failures++;
|
||||
continue;
|
||||
}
|
||||
std::string config_show = asok_request(sock_client, "config show", daemon_name);
|
||||
json_object pid_file_json = boost::json::parse(config_show).as_object();
|
||||
std::string pid_path =
|
||||
boost_string_to_std(pid_file_json["pid_file"].as_string());
|
||||
boost_string_to_std(pid_file_json["pid_file"].as_string());
|
||||
std::string pid_str = read_file_to_string(pid_path);
|
||||
if (!pid_path.size()) {
|
||||
continue;
|
||||
dout(1) << "pid path is empty; process metrics won't be fetched for: "
|
||||
<< daemon_name << dendl;
|
||||
}
|
||||
daemon_pids.push_back({daemon_name, std::stoi(pid_str)});
|
||||
json_object dump = boost::json::parse(perf_dump_response).as_object();
|
||||
json_object schema = boost::json::parse(perf_schema_response).as_object();
|
||||
for (auto &perf : schema) {
|
||||
auto sv = perf.key();
|
||||
std::string perf_group = {sv.begin(), sv.end()};
|
||||
std::string perf_group = {perf.key().begin(), perf.key().end()};
|
||||
json_object perf_group_object = perf.value().as_object();
|
||||
for (auto &perf_counter : perf_group_object) {
|
||||
auto sv1 = perf_counter.key();
|
||||
std::string perf_name = {sv1.begin(), sv1.end()};
|
||||
std::string perf_name = {perf_counter.key().begin(),
|
||||
perf_counter.key().end()};
|
||||
json_object perf_info = perf_counter.value().as_object();
|
||||
auto prio_limit = g_conf().get_val<int64_t>("exporter_prio_limit");
|
||||
if (perf_info["priority"].as_int64() <
|
||||
prio_limit) {
|
||||
if (perf_info["priority"].as_int64() < prio_limit) {
|
||||
continue;
|
||||
}
|
||||
std::string name = "ceph_" + perf_group + "_" + perf_name;
|
||||
@ -143,10 +156,12 @@ void DaemonMetricCollector::dump_asok_metrics() {
|
||||
}
|
||||
}
|
||||
}
|
||||
dout(10) << "Perf counters retrieved for " << clients.size() << " daemons." << dendl;
|
||||
dout(10) << "Perf counters retrieved for " << clients.size() - failures << "/"
|
||||
<< clients.size() << " daemons." << dendl;
|
||||
// get time spent on this function
|
||||
timer.stop();
|
||||
std::string scrap_desc("Time spent scraping and transforming perfcounters to metrics");
|
||||
std::string scrap_desc(
|
||||
"Time spent scraping and transforming perf counters to metrics");
|
||||
labels_t scrap_labels;
|
||||
scrap_labels["host"] = quote(ceph_get_hostname());
|
||||
scrap_labels["function"] = quote(__FUNCTION__);
|
||||
@ -154,7 +169,10 @@ void DaemonMetricCollector::dump_asok_metrics() {
|
||||
"gauge", scrap_labels);
|
||||
|
||||
const std::lock_guard<std::mutex> lock(metrics_mutex);
|
||||
get_process_metrics(daemon_pids);
|
||||
// only get metrics if there's pid path for some or all daemons isn't empty
|
||||
if (daemon_pids.size() != 0) {
|
||||
get_process_metrics(daemon_pids);
|
||||
}
|
||||
metrics = builder->dump();
|
||||
}
|
||||
|
||||
@ -181,7 +199,8 @@ struct pstat read_pid_stat(int pid) {
|
||||
return stat;
|
||||
}
|
||||
|
||||
void DaemonMetricCollector::get_process_metrics(std::vector<std::pair<std::string, int>> daemon_pids) {
|
||||
void DaemonMetricCollector::get_process_metrics(
|
||||
std::vector<std::pair<std::string, int>> daemon_pids) {
|
||||
std::string path("/proc");
|
||||
std::stringstream ss;
|
||||
for (auto &[daemon_name, pid] : daemon_pids) {
|
||||
@ -194,7 +213,7 @@ void DaemonMetricCollector::get_process_metrics(std::vector<std::pair<std::strin
|
||||
double total_time_seconds = user_time + kernel_time;
|
||||
double uptime = std::stod(uptimes[0]);
|
||||
double elapsed_time = uptime - start_time_seconds;
|
||||
double idle_time = elapsed_time - total_time_seconds;
|
||||
double idle_time = elapsed_time - total_time_seconds;
|
||||
double usage = total_time_seconds * 100 / elapsed_time;
|
||||
|
||||
labels_t labels;
|
||||
@ -205,8 +224,8 @@ void DaemonMetricCollector::get_process_metrics(std::vector<std::pair<std::strin
|
||||
"Number of major page faults of daemon", "counter", labels);
|
||||
add_metric(builder, stat.num_threads, "ceph_exporter_num_threads",
|
||||
"Number of threads used by daemon", "gauge", labels);
|
||||
add_metric(builder, usage, "ceph_exporter_cpu_usage", "CPU usage of a daemon",
|
||||
"gauge", labels);
|
||||
add_metric(builder, usage, "ceph_exporter_cpu_usage",
|
||||
"CPU usage of a daemon", "gauge", labels);
|
||||
|
||||
std::string cpu_time_desc = "Process time in kernel/user/idle mode";
|
||||
labels_t cpu_total_labels;
|
||||
@ -220,21 +239,22 @@ void DaemonMetricCollector::get_process_metrics(std::vector<std::pair<std::strin
|
||||
cpu_total_labels["mode"] = quote("idle");
|
||||
add_metric(builder, idle_time, "ceph_exporter_cpu_total", cpu_time_desc,
|
||||
"counter", cpu_total_labels);
|
||||
add_metric(builder, stat.vm_size, "ceph_exporter_vm_size", "Virtual memory used in a daemon",
|
||||
"gauge", labels);
|
||||
add_metric(builder, stat.vm_size, "ceph_exporter_vm_size",
|
||||
"Virtual memory used in a daemon", "gauge", labels);
|
||||
add_metric(builder, stat.resident_size, "ceph_exporter_resident_size",
|
||||
"Resident memory in a daemon", "gauge", labels);
|
||||
}
|
||||
}
|
||||
|
||||
std::string DaemonMetricCollector::asok_request(AdminSocketClient &asok,
|
||||
std::string command, std::string daemon_name) {
|
||||
std::string command,
|
||||
std::string daemon_name) {
|
||||
std::string request("{\"prefix\": \"" + command + "\"}");
|
||||
std::string response;
|
||||
std::string err = asok.do_request(request, &response);
|
||||
if (err.length() > 0 || response.substr(0, 5) == "ERROR") {
|
||||
dout(1) << "command " << command << "failed for daemon " << daemon_name
|
||||
<< "with error: " << err << dendl;
|
||||
dout(1) << "command " << command << "failed for daemon " << daemon_name
|
||||
<< "with error: " << err << dendl;
|
||||
return "";
|
||||
}
|
||||
return response;
|
||||
@ -253,8 +273,9 @@ DaemonMetricCollector::get_labels_and_metric_name(std::string daemon_name,
|
||||
} else {
|
||||
labels["ceph_daemon"] = quote(daemon_name);
|
||||
if (daemon_name.find("rbd-mirror") != std::string::npos) {
|
||||
std::regex re("^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
|
||||
")?)(.*)\\.(replay(?:_bytes|_latency)?)$");
|
||||
std::regex re(
|
||||
"^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
|
||||
")?)(.*)\\.(replay(?:_bytes|_latency)?)$");
|
||||
std::smatch match;
|
||||
if (std::regex_search(daemon_name, match, re) == true) {
|
||||
new_metric_name = "ceph_rbd_mirror_image_" + match.str(4);
|
||||
@ -277,9 +298,9 @@ void DaemonMetricCollector::dump_asok_metric(json_object perf_info,
|
||||
labels_t labels) {
|
||||
int64_t type = perf_info["type"].as_int64();
|
||||
std::string metric_type =
|
||||
boost_string_to_std(perf_info["metric_type"].as_string());
|
||||
boost_string_to_std(perf_info["metric_type"].as_string());
|
||||
std::string description =
|
||||
boost_string_to_std(perf_info["description"].as_string());
|
||||
boost_string_to_std(perf_info["description"].as_string());
|
||||
|
||||
if (type & PERFCOUNTER_LONGRUNAVG) {
|
||||
int64_t count = perf_values.as_object()["avgcount"].as_int64();
|
||||
@ -306,16 +327,15 @@ void DaemonMetricCollector::update_sockets() {
|
||||
std::string sock_dir = g_conf().get_val<std::string>("exporter_sock_dir");
|
||||
clients.clear();
|
||||
std::filesystem::path sock_path = sock_dir;
|
||||
if(!std::filesystem::is_directory(sock_path.parent_path())) {
|
||||
if (!std::filesystem::is_directory(sock_path.parent_path())) {
|
||||
dout(1) << "ERROR: No such directory exist" << sock_dir << dendl;
|
||||
return;
|
||||
}
|
||||
for (const auto &entry :
|
||||
std::filesystem::directory_iterator(sock_dir)) {
|
||||
for (const auto &entry : std::filesystem::directory_iterator(sock_dir)) {
|
||||
if (entry.path().extension() == ".asok") {
|
||||
std::string daemon_socket_name = entry.path().filename().string();
|
||||
std::string daemon_name =
|
||||
daemon_socket_name.substr(0, daemon_socket_name.size() - 5);
|
||||
daemon_socket_name.substr(0, daemon_socket_name.size() - 5);
|
||||
if (clients.find(daemon_name) == clients.end() &&
|
||||
!(daemon_name.find("mgr") != std::string::npos) &&
|
||||
!(daemon_name.find("ceph-exporter") != std::string::npos)) {
|
||||
@ -329,7 +349,6 @@ void DaemonMetricCollector::update_sockets() {
|
||||
void OrderedMetricsBuilder::add(std::string value, std::string name,
|
||||
std::string description, std::string mtype,
|
||||
labels_t labels) {
|
||||
|
||||
if (metrics.find(name) == metrics.end()) {
|
||||
Metric metric(name, mtype, description);
|
||||
metrics[name] = std::move(metric);
|
||||
@ -348,7 +367,6 @@ std::string OrderedMetricsBuilder::dump() {
|
||||
void UnorderedMetricsBuilder::add(std::string value, std::string name,
|
||||
std::string description, std::string mtype,
|
||||
labels_t labels) {
|
||||
|
||||
Metric metric(name, mtype, description);
|
||||
metric.add(labels, value);
|
||||
out += metric.dump() + "\n\n";
|
||||
|
Loading…
Reference in New Issue
Block a user