mirror of
https://github.com/ceph/ceph
synced 2025-02-22 02:27:29 +00:00
exporter: handle exceptions gracefully
ceph exporter crahes, and fails to handle exceptions in dump_asok_metrics(). add try and catch blocks to handle the exceptions gracefully. Signed-off-by: Divyansh Kamboj <dkamboj@redhat.com>
This commit is contained in:
parent
af30430fc5
commit
e442ee4799
@ -84,6 +84,66 @@ std::string boost_string_to_std(boost::json::string js) {
|
||||
|
||||
std::string quote(std::string value) { return "\"" + value + "\""; }
|
||||
|
||||
void DaemonMetricCollector::parse_asok_metrics(
|
||||
std::string &counter_dump_response, std::string &counter_schema_response,
|
||||
int64_t prio_limit, const std::string &daemon_name) {
|
||||
json_object counter_dump =
|
||||
boost::json::parse(counter_dump_response).as_object();
|
||||
json_object counter_schema =
|
||||
boost::json::parse(counter_schema_response).as_object();
|
||||
|
||||
for (auto &perf_group_item : counter_schema) {
|
||||
std::string perf_group = {perf_group_item.key().begin(),
|
||||
perf_group_item.key().end()};
|
||||
json_array perf_group_schema_array = perf_group_item.value().as_array();
|
||||
json_array perf_group_dump_array = counter_dump[perf_group].as_array();
|
||||
for (auto schema_itr = perf_group_schema_array.begin(),
|
||||
dump_itr = perf_group_dump_array.begin();
|
||||
schema_itr != perf_group_schema_array.end() &&
|
||||
dump_itr != perf_group_dump_array.end();
|
||||
++schema_itr, ++dump_itr) {
|
||||
auto counters = schema_itr->at("counters").as_object();
|
||||
auto counters_labels = schema_itr->at("labels").as_object();
|
||||
auto counters_values = dump_itr->at("counters").as_object();
|
||||
labels_t labels;
|
||||
|
||||
for (auto &label : counters_labels) {
|
||||
std::string label_key = {label.key().begin(), label.key().end()};
|
||||
labels[label_key] = quote(label.value().as_string().c_str());
|
||||
}
|
||||
for (auto &counter : counters) {
|
||||
json_object counter_group = counter.value().as_object();
|
||||
if (counter_group["priority"].as_int64() < prio_limit) {
|
||||
continue;
|
||||
}
|
||||
std::string counter_name_init = {counter.key().begin(),
|
||||
counter.key().end()};
|
||||
std::string counter_name = perf_group + "_" + counter_name_init;
|
||||
promethize(counter_name);
|
||||
|
||||
auto extra_labels = get_extra_labels(daemon_name);
|
||||
if (extra_labels.empty()) {
|
||||
dout(1) << "Unable to parse instance_id from daemon_name: "
|
||||
<< daemon_name << dendl;
|
||||
continue;
|
||||
}
|
||||
labels.insert(extra_labels.begin(), extra_labels.end());
|
||||
|
||||
// For now this is only required for rgw multi-site metrics
|
||||
auto multisite_labels_and_name = add_fixed_name_metrics(counter_name);
|
||||
if (!multisite_labels_and_name.first.empty()) {
|
||||
labels.insert(multisite_labels_and_name.first.begin(),
|
||||
multisite_labels_and_name.first.end());
|
||||
counter_name = multisite_labels_and_name.second;
|
||||
}
|
||||
auto perf_values = counters_values.at(counter_name_init);
|
||||
dump_asok_metric(counter_group, perf_values, counter_name, labels);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DaemonMetricCollector::dump_asok_metrics(bool sort_metrics, int64_t counter_prio,
|
||||
bool sockClientsPing, std::string &dump_response,
|
||||
std::string &schema_response,
|
||||
@ -125,71 +185,36 @@ void DaemonMetricCollector::dump_asok_metrics(bool sort_metrics, int64_t counter
|
||||
continue;
|
||||
}
|
||||
|
||||
json_object counter_dump = boost::json::parse(counter_dump_response).as_object();
|
||||
json_object counter_schema = boost::json::parse(counter_schema_response).as_object();
|
||||
|
||||
for (auto &perf_group_item : counter_schema) {
|
||||
std::string perf_group = {perf_group_item.key().begin(),
|
||||
perf_group_item.key().end()};
|
||||
json_array perf_group_schema_array = perf_group_item.value().as_array();
|
||||
json_array perf_group_dump_array = counter_dump[perf_group].as_array();
|
||||
for (auto schema_itr = perf_group_schema_array.begin(),
|
||||
dump_itr = perf_group_dump_array.begin();
|
||||
schema_itr != perf_group_schema_array.end() &&
|
||||
dump_itr != perf_group_dump_array.end();
|
||||
++schema_itr, ++dump_itr) {
|
||||
auto counters = schema_itr->at("counters").as_object();
|
||||
auto counters_labels = schema_itr->at("labels").as_object();
|
||||
auto counters_values = dump_itr->at("counters").as_object();
|
||||
labels_t labels;
|
||||
|
||||
for (auto &label: counters_labels) {
|
||||
std::string label_key = {label.key().begin(), label.key().end()};
|
||||
labels[label_key] = quote(label.value().as_string().c_str());
|
||||
}
|
||||
for (auto &counter : counters) {
|
||||
json_object counter_group = counter.value().as_object();
|
||||
if (counter_group["priority"].as_int64() < prio_limit) {
|
||||
continue;
|
||||
}
|
||||
std::string counter_name_init = {counter.key().begin(), counter.key().end()};
|
||||
std::string counter_name = perf_group + "_" + counter_name_init;
|
||||
promethize(counter_name);
|
||||
|
||||
auto extra_labels = get_extra_labels(daemon_name);
|
||||
if (extra_labels.empty()) {
|
||||
dout(1) << "Unable to parse instance_id from daemon_name: " << daemon_name << dendl;
|
||||
continue;
|
||||
}
|
||||
labels.insert(extra_labels.begin(), extra_labels.end());
|
||||
|
||||
// For now this is only required for rgw multi-site metrics
|
||||
auto multisite_labels_and_name = add_fixed_name_metrics(counter_name);
|
||||
if (!multisite_labels_and_name.first.empty()) {
|
||||
labels.insert(multisite_labels_and_name.first.begin(), multisite_labels_and_name.first.end());
|
||||
counter_name = multisite_labels_and_name.second;
|
||||
}
|
||||
auto perf_values = counters_values.at(counter_name_init);
|
||||
dump_asok_metric(counter_group, perf_values, counter_name, labels);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::string config_show = !config_show_response ? "" :
|
||||
try {
|
||||
std::string config_show = !config_show_response ? "" :
|
||||
asok_request(sock_client, "config show", daemon_name);
|
||||
if (config_show.size() == 0) {
|
||||
if (config_show.size() == 0) {
|
||||
failures++;
|
||||
continue;
|
||||
}
|
||||
json_object pid_file_json = boost::json::parse(config_show).as_object();
|
||||
std::string pid_path =
|
||||
boost_string_to_std(pid_file_json["pid_file"].as_string());
|
||||
std::string pid_str = read_file_to_string(pid_path);
|
||||
if (!pid_path.size()) {
|
||||
dout(1) << "pid path is empty; process metrics won't be fetched for: "
|
||||
<< daemon_name << dendl;
|
||||
}
|
||||
if (!pid_str.empty()) {
|
||||
daemon_pids.push_back({daemon_name, std::stoi(pid_str)});
|
||||
}
|
||||
parse_asok_metrics(counter_dump_response, counter_schema_response,
|
||||
prio_limit, daemon_name);
|
||||
} catch (const std::invalid_argument &e) {
|
||||
failures++;
|
||||
dout(1) << "failed to handle " << daemon_name << ": " << e.what()
|
||||
<< dendl;
|
||||
continue;
|
||||
} catch (const std::runtime_error &e) {
|
||||
failures++;
|
||||
dout(1) << "failed to parse json for " << daemon_name << ": " << e.what()
|
||||
<< dendl;
|
||||
continue;
|
||||
}
|
||||
json_object pid_file_json = boost::json::parse(config_show).as_object();
|
||||
std::string pid_path =
|
||||
boost_string_to_std(pid_file_json["pid_file"].as_string());
|
||||
std::string pid_str = read_file_to_string(pid_path);
|
||||
if (!pid_path.size()) {
|
||||
dout(1) << "pid path is empty; process metrics won't be fetched for: "
|
||||
<< daemon_name << dendl;
|
||||
}
|
||||
if (!pid_str.empty()) {
|
||||
daemon_pids.push_back({daemon_name, std::stoi(pid_str)});
|
||||
}
|
||||
}
|
||||
dout(10) << "Perf counters retrieved for " << clients.size() - failures << "/"
|
||||
|
@ -52,6 +52,9 @@ private:
|
||||
void dump_asok_metric(boost::json::object perf_info,
|
||||
boost::json::value perf_values, std::string name,
|
||||
labels_t labels);
|
||||
void parse_asok_metrics(std::string &counter_dump_response,
|
||||
std::string &counter_schema_response,
|
||||
int64_t prio_limit, const std::string &daemon_name);
|
||||
void get_process_metrics(std::vector<std::pair<std::string, int>> daemon_pids);
|
||||
std::string asok_request(AdminSocketClient &asok, std::string command, std::string daemon_name);
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user