Merge pull request #52191 from rhcs-dashboard/fix-daemon-labels-exporter

mgr/dashboard: empty grafana panels for performance of daemons 

Reviewed-by: Pegonzal <NOT@FOUND>
Reviewed-by: cloudbehl <NOT@FOUND>
Reviewed-by: Juan Miguel Olmo <jolmomar@redhat.com>
Reviewed-by: Pere Diaz Bou <pdiazbou@redhat.com>
This commit is contained in:
Nizamudeen A 2023-07-28 18:46:35 +05:30 committed by GitHub
commit 7b3a4f2b4e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 69 additions and 11 deletions

View File

@ -7,6 +7,7 @@
#include <map>
#include <memory>
#include <regex>
#include <sstream>
#include <string>
#include <utility>
@ -157,7 +158,6 @@ void DaemonMetricCollector::dump_asok_metrics() {
labels.insert(multisite_labels_and_name.first.begin(), multisite_labels_and_name.first.end());
counter_name = multisite_labels_and_name.second;
}
labels.insert({"ceph_daemon", quote(daemon_name)});
auto perf_values = counters_values.at(counter_name_init);
dump_asok_metric(counter_group, perf_values, counter_name, labels);
}
@ -291,6 +291,14 @@ DaemonMetricCollector::get_labels_and_metric_name(std::string daemon_name,
std::string new_metric_name;
labels_t labels;
new_metric_name = metric_name;
const std::string ceph_daemon_prefix = "ceph-";
const std::string ceph_client_prefix = "client.";
if (daemon_name.rfind(ceph_daemon_prefix, 0) == 0) {
daemon_name = daemon_name.substr(ceph_daemon_prefix.size());
}
if (daemon_name.rfind(ceph_client_prefix, 0) == 0) {
daemon_name = daemon_name.substr(ceph_client_prefix.size());
}
// In vstart cluster socket files for rgw are stored as radosgw.<instance_id>.asok
if (daemon_name.find("radosgw") != std::string::npos) {
std::size_t pos = daemon_name.find_last_of('.');
@ -298,11 +306,17 @@ DaemonMetricCollector::get_labels_and_metric_name(std::string daemon_name,
labels["instance_id"] = quote(tmp);
}
else if (daemon_name.find("rgw") != std::string::npos) {
std::string tmp = daemon_name.substr(16, std::string::npos);
std::string::size_type pos = tmp.find('.');
labels["instance_id"] = quote("rgw." + tmp.substr(0, pos));
// fetch intance_id for e.g. "okbvtv" from daemon_name=rgw.foo.ceph-node-00.okbvtv
size_t pos = daemon_name.find_last_of(".");
std::string instance_id = "";
if (pos != std::string::npos) {
instance_id = daemon_name.substr(pos+1);
}
labels["instance_id"] = quote(instance_id);
} else {
labels.insert({"ceph_daemon", quote(daemon_name)});
}
else if (daemon_name.find("rbd-mirror") != std::string::npos) {
if (daemon_name.find("rbd-mirror") != std::string::npos) {
std::regex re(
"^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
")?)(.*)\\.(replay(?:_bytes|_latency)?)$");

View File

@ -34,6 +34,8 @@ class DaemonMetricCollector {
public:
void main();
std::string get_metrics();
std::pair<labels_t, std::string>
get_labels_and_metric_name(std::string daemon_name, std::string metric_name);
private:
std::map<std::string, AdminSocketClient> clients;
@ -47,8 +49,6 @@ private:
void dump_asok_metric(boost::json::object perf_info,
boost::json::value perf_values, std::string name,
labels_t labels);
std::pair<labels_t, std::string>
get_labels_and_metric_name(std::string daemon_name, std::string metric_name);
std::pair<labels_t, std::string> add_fixed_name_metrics(std::string metric_name);
void get_process_metrics(std::vector<std::pair<std::string, int>> daemon_pids);
std::string asok_request(AdminSocketClient &asok, std::string command, std::string daemon_name);

View File

@ -38,6 +38,8 @@ cypress_run () {
cd ${CEPH_DEV_FOLDER}/src/pybind/mgr/dashboard/frontend
kcli ssh -u root ceph-node-00 'cephadm shell "ceph config set mgr mgr/prometheus/exclude_perf_counters false"'
# check if the prometheus daemon is running
# before starting the e2e tests
@ -53,7 +55,5 @@ kcli ssh -u root ceph-node-00 'cephadm shell "ceph dashboard set-prometheus-api-
kcli ssh -u root ceph-node-00 'cephadm shell "ceph dashboard set-grafana-api-url https://192.168.100.100:3000"'
kcli ssh -u root ceph-node-00 'cephadm shell "ceph orch apply node-exporter --placement 'count:2'"'
kcli ssh -u root ceph-node-00 'cephadm shell "ceph config set mgr mgr/prometheus/exclude_perf_counters false"'
cypress_run ["cypress/e2e/orchestrator/workflow/*.feature","cypress/e2e/orchestrator/workflow/*-spec.ts"]
cypress_run "cypress/e2e/orchestrator/grafana/*.feature"

View File

@ -13,6 +13,7 @@ from collections import namedtuple
from mgr_module import CLIReadCommand, MgrModule, MgrStandbyModule, PG_STATES, Option, ServiceInfoT, HandleCommandResult, CLIWriteCommand
from mgr_util import get_default_addr, profile_method, build_url
from orchestrator import OrchestratorClientMixin, raise_if_exception, NoOrchestrator
from rbd import RBD
from typing import DefaultDict, Optional, Dict, Any, Set, cast, Tuple, Union, List, Callable
@ -548,7 +549,7 @@ class MetricCollectionThread(threading.Thread):
self.event.set()
class Module(MgrModule):
class Module(MgrModule, OrchestratorClientMixin):
MODULE_OPTIONS = [
Option(
'server_addr',
@ -645,6 +646,8 @@ class Module(MgrModule):
_global_instance = self
self.metrics_thread = MetricCollectionThread(_global_instance)
self.health_history = HealthHistory(self)
self.modify_instance_id = self.get_orch_status() and self.get_module_option(
'exclude_perf_counters')
def _setup_static_metrics(self) -> Dict[str, Metric]:
metrics = {}
@ -861,6 +864,12 @@ class Module(MgrModule):
return metrics
def get_orch_status(self) -> bool:
try:
return self.available()[0]
except NoOrchestrator:
return False
def get_server_addr(self) -> str:
"""
Return the current mgr server IP.
@ -1281,9 +1290,20 @@ class Module(MgrModule):
)
# Populate other servers metadata
# If orchestrator is available and ceph-exporter is running modify rgw instance id
# to match the one from exporter
if self.modify_instance_id:
daemons = raise_if_exception(self.list_daemons(daemon_type='rgw'))
for daemon in daemons:
self.metrics['rgw_metadata'].set(1,
('{}.{}'.format(str(daemon.daemon_type),
str(daemon.daemon_id)),
str(daemon.hostname),
str(daemon.version),
str(daemon.daemon_id).split(".")[2]))
for key, value in servers.items():
service_id, service_type = key
if service_type == 'rgw':
if service_type == 'rgw' and not self.modify_instance_id:
hostname, version, name = value
self.metrics['rgw_metadata'].set(
1,

View File

@ -1,6 +1,7 @@
add_executable(unittest_exporter
test_exporter.cc
"${CMAKE_SOURCE_DIR}/src/exporter/util.cc"
"${CMAKE_SOURCE_DIR}/src/exporter/DaemonMetricCollector.cc"
)
target_link_libraries(unittest_exporter

View File

@ -1,10 +1,13 @@
#include "gtest/gtest.h"
#include "exporter/util.h"
#include "exporter/DaemonMetricCollector.h"
#include <string>
#include <vector>
#include <utility>
typedef std::map<std::string, std::string> labels_t;
// 17.2.6's memento mori:
// This data was gathered from the python implementation of the promethize method
// where we transform the path of a counter to a valid prometheus name.
@ -662,3 +665,23 @@ TEST(Exporter, promethize) {
}
}
TEST(Exporter, check_labels_and_metric_name) {
static std::vector<std::pair<std::string, std::string>> counters_data;
counters_data.emplace_back("ceph-osd.0", "ceph_osd_numpg");
counters_data.emplace_back("ceph-client.rgw.foo.ceph-node-00.okbvtv", "ceph_rgw_get");
static std::vector<std::pair<labels_t, std::string>> labels_and_name;
labels_and_name.emplace_back(labels_t{{"ceph_daemon", "\"osd.0\""}}, "ceph_osd_numpg");
labels_and_name.emplace_back(labels_t{{"instance_id", "\"okbvtv\""}}, "ceph_rgw_get");
auto counter_data_itr = counters_data.begin();
auto labels_and_name_itr = labels_and_name.begin();
for (; counter_data_itr != counters_data.end() && labels_and_name_itr != labels_and_name.end();
++counter_data_itr, ++labels_and_name_itr) {
std::string daemon_name = counter_data_itr->first;
std::string counter_name = counter_data_itr->second;
DaemonMetricCollector &collector = collector_instance();
std::pair<labels_t, std::string> result = collector.get_labels_and_metric_name(daemon_name, counter_name);
ASSERT_EQ(result.first, labels_and_name_itr->first);
ASSERT_EQ(result.second, labels_and_name_itr->second);
}
}