mirror of
https://github.com/ceph/ceph
synced 2025-01-03 01:22:53 +00:00
mon: subtree-based crush type down health warnings
Signed-off-by: Neha Ojha <nojha@redhat.com>
This commit is contained in:
parent
d69aaccbc4
commit
72682e57ce
@ -587,6 +587,20 @@ int CrushWrapper::get_full_location_ordered(int id, vector<pair<string, string>
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string CrushWrapper::get_full_location_ordered_string(int id)
|
||||||
|
{
|
||||||
|
vector<pair<string, string> > full_location_ordered;
|
||||||
|
string full_location;
|
||||||
|
get_full_location_ordered(id, full_location_ordered);
|
||||||
|
reverse(begin(full_location_ordered), end(full_location_ordered));
|
||||||
|
for(auto i = full_location_ordered.begin(); i != full_location_ordered.end(); i++) {
|
||||||
|
full_location = full_location + i->first + "=" + i->second;
|
||||||
|
if (i != full_location_ordered.end() - 1) {
|
||||||
|
full_location = full_location + ",";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return full_location;
|
||||||
|
}
|
||||||
|
|
||||||
map<int, string> CrushWrapper::get_parent_hierarchy(int id)
|
map<int, string> CrushWrapper::get_parent_hierarchy(int id)
|
||||||
{
|
{
|
||||||
|
@ -590,6 +590,15 @@ public:
|
|||||||
*/
|
*/
|
||||||
int get_full_location_ordered(int id, vector<pair<string, string> >& path);
|
int get_full_location_ordered(int id, vector<pair<string, string> >& path);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* identical to get_full_location_ordered(int id, vector<pair<string, string> >& path),
|
||||||
|
* although it returns a concatenated string with the type/name pairs in descending
|
||||||
|
* hierarchical order with format key1=val1,key2=val2.
|
||||||
|
*
|
||||||
|
* returns the location in descending hierarchy as a string.
|
||||||
|
*/
|
||||||
|
string get_full_location_ordered_string(int id);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* returns (type_id, type) of all parent buckets between id and
|
* returns (type_id, type) of all parent buckets between id and
|
||||||
* default, can be used to check for anomolous CRUSH maps
|
* default, can be used to check for anomolous CRUSH maps
|
||||||
|
@ -3460,15 +3460,14 @@ void OSDMonitor::get_health(list<pair<health_status_t,string> >& summary,
|
|||||||
} else {
|
} else {
|
||||||
int num_in_osds = 0;
|
int num_in_osds = 0;
|
||||||
int num_down_in_osds = 0;
|
int num_down_in_osds = 0;
|
||||||
int num_in_subtrees = 0;
|
|
||||||
int num_down_in_subtrees = 0;
|
|
||||||
set<int> osds;
|
set<int> osds;
|
||||||
set<int> down_cache; // quick cache of down subtrees
|
set<int> down_in_osds;
|
||||||
set<int> in_subtrees;
|
set<int> up_in_osds;
|
||||||
set<int> up_in_subtrees;
|
set<int> subtree_up;
|
||||||
set<int> down_in_subtrees;
|
unordered_map<int, set<int> > subtree_type_down;
|
||||||
set<string> down_in_subtree_names;
|
unordered_map<int, int> num_osds_subtree;
|
||||||
int type = osdmap.crush->get_type_id(g_conf->mon_osd_down_out_subtree_limit);
|
int max_type = osdmap.crush->get_num_type_names() - 1;
|
||||||
|
|
||||||
for (int i = 0; i < osdmap.get_max_osd(); i++) {
|
for (int i = 0; i < osdmap.get_max_osd(); i++) {
|
||||||
if (!osdmap.exists(i)) {
|
if (!osdmap.exists(i)) {
|
||||||
if (osdmap.crush->item_exists(i)) {
|
if (osdmap.crush->item_exists(i)) {
|
||||||
@ -3479,57 +3478,97 @@ void OSDMonitor::get_health(list<pair<health_status_t,string> >& summary,
|
|||||||
if (osdmap.is_out(i))
|
if (osdmap.is_out(i))
|
||||||
continue;
|
continue;
|
||||||
++num_in_osds;
|
++num_in_osds;
|
||||||
// get the id of the parent subtree
|
if (down_in_osds.count(i) || up_in_osds.count(i))
|
||||||
int subtree_id = osdmap.get_parent_subtree_id(g_ceph_context, i, type, &down_cache);
|
continue;
|
||||||
if (subtree_id != -ENOENT) {
|
|
||||||
in_subtrees.insert(subtree_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!osdmap.is_up(i)) {
|
if (!osdmap.is_up(i)) {
|
||||||
++num_down_in_osds;
|
down_in_osds.insert(i);
|
||||||
if (detail) {
|
int parent_id = 0;
|
||||||
const osd_info_t& info = osdmap.get_info(i);
|
int current = i;
|
||||||
ostringstream ss;
|
for (int type = 0; type <= max_type; type++) {
|
||||||
map<string, string> loc;
|
int r = osdmap.crush->get_immediate_parent_id(current, &parent_id);
|
||||||
loc = osdmap.crush->get_full_location(i);
|
if (r == -ENOENT)
|
||||||
ss << "osd." << i << loc << " is down since epoch " << info.down_at << ", last address "
|
break;
|
||||||
<< osdmap.get_addr(i);
|
// break early if this parent is already marked as up
|
||||||
detail->push_back(make_pair(HEALTH_WARN, ss.str()));
|
if (subtree_up.count(parent_id))
|
||||||
|
break;
|
||||||
|
type = osdmap.crush->get_bucket_type(parent_id);
|
||||||
|
if (!osdmap.subtree_type_is_down(g_ceph_context, parent_id, type, &down_in_osds, &up_in_osds, &subtree_up, &subtree_type_down))
|
||||||
|
break;
|
||||||
|
current = parent_id;
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// if an osd in a subtree is up, implies subtree is not down
|
|
||||||
up_in_subtrees.insert(subtree_id);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
set_difference(in_subtrees.begin(), in_subtrees.end(),
|
// calculate the number of down osds in each down subtree and store it in num_osds_subtree
|
||||||
up_in_subtrees.begin(), up_in_subtrees.end(),
|
for (int type = 1; type <= max_type; type++) {
|
||||||
inserter(down_in_subtrees, down_in_subtrees.end()));
|
for (auto j = subtree_type_down[type].begin(); j != subtree_type_down[type].end(); ++j) {
|
||||||
num_in_subtrees = in_subtrees.size();
|
if (type == 1) {
|
||||||
num_down_in_subtrees = down_in_subtrees.size();
|
list<int> children;
|
||||||
for (set<int>::iterator it = down_in_subtrees.begin();
|
int num = osdmap.crush->get_children(*j, &children);
|
||||||
it != down_in_subtrees.end(); ++it) {
|
num_osds_subtree[*j] = num;
|
||||||
down_in_subtree_names.insert(osdmap.crush->get_item_name(*it));
|
} else {
|
||||||
|
list<int> children;
|
||||||
|
int num = 0;
|
||||||
|
int num_children = osdmap.crush->get_children(*j, &children);
|
||||||
|
if (num_children == 0)
|
||||||
|
continue;
|
||||||
|
for (auto l = children.begin(); l != children.end(); ++l) {
|
||||||
|
if (num_osds_subtree[*l] > 0) {
|
||||||
|
num = num + num_osds_subtree[*l];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
num_osds_subtree[*j] = num;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
num_down_in_osds = down_in_osds.size();
|
||||||
assert(num_down_in_osds <= num_in_osds);
|
assert(num_down_in_osds <= num_in_osds);
|
||||||
assert(num_down_in_subtrees <= num_in_subtrees);
|
|
||||||
if (num_down_in_osds > 0) {
|
if (num_down_in_osds > 0) {
|
||||||
ostringstream ss;
|
ostringstream ss;
|
||||||
ss << num_down_in_osds << "/" << num_in_osds << " in osds are down";
|
ss << "\n";
|
||||||
|
// summary of down subtree types and osds
|
||||||
|
for (int type = max_type; type > 0; type--) {
|
||||||
|
if (subtree_type_down[type].size() > 0) {
|
||||||
|
ss << subtree_type_down[type].size() << " " << osdmap.crush->get_type_name(type);
|
||||||
|
if (subtree_type_down[type].size() > 1) {
|
||||||
|
ss << "s";
|
||||||
|
}
|
||||||
|
int sum_down_osds = 0;
|
||||||
|
for (auto j = subtree_type_down[type].begin(); j != subtree_type_down[type].end(); ++j) {
|
||||||
|
sum_down_osds = sum_down_osds + num_osds_subtree[*j];
|
||||||
|
}
|
||||||
|
ss << " (" << sum_down_osds << " osds) down\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ss << down_in_osds.size() << " osds are down\n";
|
||||||
summary.push_back(make_pair(HEALTH_WARN, ss.str()));
|
summary.push_back(make_pair(HEALTH_WARN, ss.str()));
|
||||||
if (num_down_in_subtrees > 0) {
|
|
||||||
ostringstream sst;
|
if (detail) {
|
||||||
if (num_in_subtrees == 1) {
|
ostringstream ss;
|
||||||
sst << num_down_in_subtrees << "/" << num_in_subtrees << " " << g_conf->mon_osd_down_out_subtree_limit <<
|
// details of down subtree types
|
||||||
" is down";
|
for (int type = max_type; type > 0; type--) {
|
||||||
sst << "(" << down_in_subtree_names << ")";
|
for (auto j = subtree_type_down[type].rbegin(); j != subtree_type_down[type].rend(); ++j) {
|
||||||
summary.push_back(make_pair(HEALTH_WARN, sst.str()));
|
ss << osdmap.crush->get_type_name(type);
|
||||||
} else {
|
ss << " ";
|
||||||
sst << num_down_in_subtrees << "/" << num_in_subtrees << " " << g_conf->mon_osd_down_out_subtree_limit <<
|
ss << osdmap.crush->get_item_name(*j);
|
||||||
"s are down";
|
// at the top level, do not print location
|
||||||
sst << "(" << down_in_subtree_names << ")";
|
if (type != max_type) {
|
||||||
summary.push_back(make_pair(HEALTH_WARN, sst.str()));
|
ss << " (";
|
||||||
|
ss << osdmap.crush->get_full_location_ordered_string(*j);
|
||||||
|
ss << ")";
|
||||||
|
}
|
||||||
|
int num = num_osds_subtree[*j];
|
||||||
|
ss << " (" << num << " osds)";
|
||||||
|
ss << " is down\n";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// details of down osds
|
||||||
|
for (auto it = down_in_osds.begin(); it != down_in_osds.end(); ++it) {
|
||||||
|
ss << "osd." << *it << " (";
|
||||||
|
ss << osdmap.crush->get_full_location_ordered_string(*it);
|
||||||
|
ss << ") is down\n";
|
||||||
|
}
|
||||||
|
detail->push_back(make_pair(HEALTH_WARN, ss.str()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,32 +293,36 @@ bool OSDMap::containing_subtree_is_down(CephContext *cct, int id, int subtree_ty
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int OSDMap::get_parent_subtree_id(CephContext *cct, int id, int subtree_type, set<int> *down_cache) const
|
bool OSDMap::subtree_type_is_down(CephContext *cct, int id, int subtree_type, set<int> *down_in_osds, set<int> *up_in_osds,
|
||||||
|
set<int> *subtree_up, unordered_map<int, set<int> > *subtree_type_down) const
|
||||||
{
|
{
|
||||||
set<int> local_down_cache;
|
if (id >= 0) {
|
||||||
if (!down_cache) {
|
bool is_down_ret = is_down(id);
|
||||||
down_cache = &local_down_cache;
|
if (is_down_ret) {
|
||||||
}
|
down_in_osds->insert(id);
|
||||||
|
|
||||||
int current = id;
|
|
||||||
while (true) {
|
|
||||||
int type;
|
|
||||||
if (current >= 0) {
|
|
||||||
type = 0;
|
|
||||||
} else {
|
} else {
|
||||||
type = crush->get_bucket_type(current);
|
up_in_osds->insert(id);
|
||||||
}
|
}
|
||||||
assert(type >= 0);
|
return is_down_ret;
|
||||||
|
}
|
||||||
|
|
||||||
if (type >= subtree_type) {
|
if (subtree_type_down &&
|
||||||
return current;
|
(*subtree_type_down)[subtree_type].count(id)) {
|
||||||
}
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
int r = crush->get_immediate_parent_id(current, ¤t);
|
list<int> children;
|
||||||
if (r < 0) {
|
crush->get_children(id, &children);
|
||||||
return -ENOENT;
|
for (const auto &child : children) {
|
||||||
|
if (!subtree_type_is_down(cct, child, crush->get_bucket_type(child), down_in_osds, up_in_osds, subtree_up, subtree_type_down)) {
|
||||||
|
subtree_up->insert(id);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (subtree_type_down) {
|
||||||
|
(*subtree_type_down)[subtree_type].insert(id);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void OSDMap::Incremental::encode_client_old(bufferlist& bl) const
|
void OSDMap::Incremental::encode_client_old(bufferlist& bl) const
|
||||||
|
@ -495,10 +495,9 @@ public:
|
|||||||
bool subtree_is_down(int id, set<int> *down_cache) const;
|
bool subtree_is_down(int id, set<int> *down_cache) const;
|
||||||
bool containing_subtree_is_down(CephContext *cct, int osd, int subtree_type, set<int> *down_cache) const;
|
bool containing_subtree_is_down(CephContext *cct, int osd, int subtree_type, set<int> *down_cache) const;
|
||||||
|
|
||||||
/**
|
bool subtree_type_is_down(CephContext *cct, int id, int subtree_type, set<int> *down_in_osds, set<int> *up_in_osds,
|
||||||
* get the id of the parent subtree
|
set<int> *subtree_up, unordered_map<int, set<int> > *subtree_type_down) const;
|
||||||
*/
|
|
||||||
int get_parent_subtree_id(CephContext *cct, int osd, int subtree_type, set<int> *down_cache) const;
|
|
||||||
int identify_osd(const entity_addr_t& addr) const;
|
int identify_osd(const entity_addr_t& addr) const;
|
||||||
int identify_osd(const uuid_d& u) const;
|
int identify_osd(const uuid_d& u) const;
|
||||||
int identify_osd_on_all_channels(const entity_addr_t& addr) const;
|
int identify_osd_on_all_channels(const entity_addr_t& addr) const;
|
||||||
|
Loading…
Reference in New Issue
Block a user