From 29e0b526a2c1807fb898bef1c9f1524e889a80ec Mon Sep 17 00:00:00 2001 From: Mingxin Liu Date: Fri, 13 Jan 2017 15:12:34 +0800 Subject: [PATCH 1/2] OSDMonitor: drop report message from all another reporters is not reasonable if osd failed finally, this may make these reporters cannot receive lastest update right away, besides, it's not effective to make a traverse of all reporters Signed-off-by: Mingxin Liu --- src/mon/OSDMonitor.cc | 10 +++------- src/mon/OSDMonitor.h | 8 ++++---- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 6194c5ba332..aa27215ed1c 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1789,13 +1789,9 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op) << m->get_orig_source_inst() << "\n"; if (failure_info.count(target_osd)) { failure_info_t& fi = failure_info[target_osd]; - list ls; - fi.take_report_messages(ls); - fi.cancel_report(reporter); - while (!ls.empty()) { - if (ls.front()) - mon->no_reply(ls.front()); - ls.pop_front(); + MonOpRequestRef report_op = fi.cancel_report(reporter); + if (report_op) { + mon->no_reply(report_op); } if (fi.reporters.empty()) { dout(10) << " removing last failure_info for osd." << target_osd diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 612870ec72c..e5e366684b8 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -99,13 +99,13 @@ struct failure_info_t { } } - void cancel_report(int who) { + MonOpRequestRef cancel_report(int who) { map::iterator p = reporters.find(who); if (p == reporters.end()) - return; + return MonOpRequestRef(); + MonOpRequestRef ret = p->second.op; reporters.erase(p); - if (reporters.empty()) - max_failed_since = utime_t(); + return ret; } }; From 0ec21a5a8ab9aed8b728c98fb8cb9628bb4672a2 Mon Sep 17 00:00:00 2001 From: Mingxin Liu Date: Mon, 16 Jan 2017 11:54:34 +0800 Subject: [PATCH 2/2] OSDMonitor: calculate failure time only when osd reported failed Signed-off-by: Mingxin Liu --- src/mon/OSDMonitor.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index aa27215ed1c..9ae2cf18c14 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1759,13 +1759,12 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op) assert(osdmap.is_up(target_osd)); assert(osdmap.get_addr(target_osd) == m->get_target().addr); - // calculate failure time - utime_t now = ceph_clock_now(); - utime_t failed_since = - m->get_recv_stamp() - - utime_t(m->failed_for ? m->failed_for : g_conf->osd_heartbeat_grace, 0); - if (m->if_osd_failed()) { + // calculate failure time + utime_t now = ceph_clock_now(); + utime_t failed_since = + m->get_recv_stamp() - utime_t(m->failed_for, 0); + // add a report if (m->is_immediate()) { mon->clog->debug() << m->get_target() << " reported immediately failed by "