mon: Improvements to slow heartbeat health messages

Include crush parentage for each osd

Fixes: https://tracker.ceph.com/issues/43126

Signed-off-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
David Zafman 2019-12-18 21:20:25 -08:00
parent 98f5ce79a6
commit 886475b5fe
4 changed files with 61 additions and 14 deletions

View File

@ -171,18 +171,18 @@ By default we will warn about ping times which exceed 1 second (1000 millisecond
::
HEALTH_WARN Long heartbeat ping times on back interface seen, longest is 1118.001 msec
HEALTH_WARN Slow OSD heartbeats on back (longest 1118.001ms)
The health detail will add the combination of OSDs are seeing the delays and by how much. There is a limit of 10
detail line items.
::
[WRN] OSD_SLOW_PING_TIME_BACK: Long heartbeat ping times on back interface seen, longest is 1118.001 msec
Slow heartbeat ping on back interface from osd.0 to osd.1 1118.001 msec
Slow heartbeat ping on back interface from osd.0 to osd.2 1030.123 msec
Slow heartbeat ping on back interface from osd.2 to osd.1 1015.321 msec
Slow heartbeat ping on back interface from osd.1 to osd.0 1010.456 msec
[WRN] OSD_SLOW_PING_TIME_BACK: Slow OSD heartbeats on back (longest 1118.001ms)
Slow OSD heartbeats on back from osd.0 [dc1,rack1] to osd.1 [dc1,rack1] 1118.001 msec possibly improving
Slow OSD heartbeats on back from osd.0 [dc1,rack1] to osd.2 [dc1,rack2] 1030.123 msec
Slow OSD heartbeats on back from osd.2 [dc1,rack2] to osd.1 [dc1,rack1] 1015.321 msec
Slow OSD heartbeats on back from osd.1 [dc1,rack1] to osd.0 [dc1,rack1] 1010.456 msec
To see even more detail and a complete dump of network performance information the ``dump_osd_network`` command can be used. Typically, this would be
sent to a mgr, but it can be limited to a particular OSD's interactions by issuing it to any OSD. The current threshold which defaults to 1 second

View File

@ -119,8 +119,30 @@ function TEST_network_ping_test2() {
run_osd $dir 2 || return 1
sleep 5
ceph osd crush add-bucket dc1 datacenter
ceph osd crush add-bucket dc2 datacenter
ceph osd crush add-bucket dc3 datacenter
ceph osd crush add-bucket rack1 rack
ceph osd crush add-bucket rack2 rack
ceph osd crush add-bucket rack3 rack
ceph osd crush add-bucket host1 host
ceph osd crush add-bucket host2 host
ceph osd crush add-bucket host3 host
ceph osd crush move dc1 root=default
ceph osd crush move dc2 root=default
ceph osd crush move dc3 root=default
ceph osd crush move rack1 datacenter=dc1
ceph osd crush move rack2 datacenter=dc2
ceph osd crush move rack3 datacenter=dc3
ceph osd crush move host1 rack=rack1
ceph osd crush move host2 rack=rack2
ceph osd crush move host3 rack=rack3
ceph osd crush set osd.0 1.0 host=host1
ceph osd crush set osd.1 1.0 host=host2
ceph osd crush set osd.2 1.0 host=host3
ceph osd crush rule create-simple myrule default host firstn
create_pool foo 16
create_pool foo 16 16 replicated myrule
# write some objects
timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
@ -130,11 +152,13 @@ function TEST_network_ping_test2() {
flush_pg_stats
ceph health | tee $dir/health
grep -q "Long heartbeat" $dir/health || return 1
grep -q "Slow OSD heartbeats" $dir/health || return 1
ceph health detail | tee $dir/health
grep -q "OSD_SLOW_PING_TIME_BACK" $dir/health || return 1
grep -q "OSD_SLOW_PING_TIME_FRONT" $dir/health || return 1
grep -q "Slow OSD heartbeats on front from osd[.][0-2] [[]dc[1-3],rack[1-3][]] \
to osd[.][0-2] [[]dc[1-3],rack[1-3][]]" $dir/health || return 1
rm -f $dir/health
}

View File

@ -2363,6 +2363,23 @@ void PGMap::dump_pool_stats_and_io_rate(int64_t poolid, const OSDMap &osd_map,
}
}
// Get crush parentage for an osd (skip root)
set<std::string> PGMap::osd_parentage(const OSDMap& osdmap, int id) const
{
set<std::string> reporters_by_subtree;
auto reporter_subtree_level = g_conf().get_val<string>("mon_osd_reporter_subtree_level");
auto loc = osdmap.crush->get_full_location(id);
for (auto& [parent_bucket_type, parent_id] : loc) {
// Should we show the root? Might not be too informative like "default"
if (parent_bucket_type != "root" &&
parent_bucket_type != reporter_subtree_level) {
reporters_by_subtree.insert(parent_id);
}
}
return reporters_by_subtree;
}
void PGMap::get_health_checks(
CephContext *cct,
const OSDMap& osdmap,
@ -2833,9 +2850,11 @@ void PGMap::get_health_checks(
break;
}
max_detail--;
ss << "Slow heartbeat ping on back interface from osd." << sback.from
ss << "Slow OSD heartbeats on back from osd." << sback.from
<< " [" << osd_parentage(osdmap, sback.from) << "]"
<< (osdmap.is_down(sback.from) ? " (down)" : "")
<< " to osd." << sback.to
<< " [" << osd_parentage(osdmap, sback.to) << "]"
<< (osdmap.is_down(sback.to) ? " (down)" : "")
<< " " << fixed_u_to_string(sback.pingtime, 3) << " msec"
<< (sback.improving ? " possibly improving" : "");
@ -2850,9 +2869,12 @@ void PGMap::get_health_checks(
break;
}
max_detail--;
ss << "Slow heartbeat ping on front interface from osd." << sfront.from
// Get crush parentage for each osd
ss << "Slow OSD heartbeats on front from osd." << sfront.from
<< " [" << osd_parentage(osdmap, sfront.from) << "]"
<< (osdmap.is_down(sfront.from) ? " (down)" : "")
<< " to osd." << sfront.to
<< " [" << osd_parentage(osdmap, sfront.to) << "]"
<< (osdmap.is_down(sfront.to) ? " (down)" : "")
<< " " << fixed_u_to_string(sfront.pingtime, 3) << " msec"
<< (sfront.improving ? " possibly improving" : "");
@ -2860,16 +2882,16 @@ void PGMap::get_health_checks(
}
if (detail_back.size() != 0) {
ostringstream ss;
ss << "Long heartbeat ping times on back interface seen, longest is "
<< fixed_u_to_string(back_sorted.rbegin()->pingtime, 3) << " msec";
ss << "Slow OSD heartbeats on back (longest "
<< fixed_u_to_string(back_sorted.rbegin()->pingtime, 3) << "ms)";
auto& d = checks->add("OSD_SLOW_PING_TIME_BACK", HEALTH_WARN, ss.str(),
back_sorted.size());
d.detail.swap(detail_back);
}
if (detail_front.size() != 0) {
ostringstream ss;
ss << "Long heartbeat ping times on front interface seen, longest is "
<< fixed_u_to_string(front_sorted.rbegin()->pingtime, 3) << " msec";
ss << "Slow OSD heartbeats on front (longest "
<< fixed_u_to_string(front_sorted.rbegin()->pingtime, 3) << "ms)";
auto& d = checks->add("OSD_SLOW_PING_TIME_FRONT", HEALTH_WARN, ss.str(),
front_sorted.size());
d.detail.swap(detail_front);

View File

@ -491,6 +491,7 @@ public:
void get_filtered_pg_stats(uint64_t state, int64_t poolid, int64_t osdid,
bool primary, std::set<pg_t>& pgs) const;
set<std::string> osd_parentage(const OSDMap& osdmap, int id) const;
void get_health_checks(
CephContext *cct,
const OSDMap& osdmap,