From 7d76458354661f7575c4a2cae251a9b828513580 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Aug 2018 08:22:05 -0500 Subject: [PATCH 1/2] osd: tick at OSD_TICK_INTERVAL, not heartbeat interval Heartbeat inveral is not related! Signed-off-by: Sage Weil --- src/osd/OSD.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 86e06dd5619..db51ad153a3 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2606,10 +2606,12 @@ int OSD::init() heartbeat_thread.create("osd_srv_heartbt"); // tick - tick_timer.add_event_after(cct->_conf->osd_heartbeat_interval, new C_Tick(this)); + tick_timer.add_event_after(OSD_TICK_INTERVAL, + new C_Tick(this)); { Mutex::Locker l(tick_timer_lock); - tick_timer_without_osd_lock.add_event_after(cct->_conf->osd_heartbeat_interval, new C_Tick_WithoutOSDLock(this)); + tick_timer_without_osd_lock.add_event_after(OSD_TICK_INTERVAL, + new C_Tick_WithoutOSDLock(this)); } osd_lock.Unlock(); @@ -4875,7 +4877,8 @@ void OSD::tick_without_osd_lock() mgrc.update_daemon_health(get_health_metrics()); service.kick_recovery_queue(); - tick_timer_without_osd_lock.add_event_after(OSD_TICK_INTERVAL, new C_Tick_WithoutOSDLock(this)); + tick_timer_without_osd_lock.add_event_after(OSD_TICK_INTERVAL, + new C_Tick_WithoutOSDLock(this)); } // Usage: From 2011377c379c9d53a3a0a693a7874fc330278898 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Aug 2018 08:33:42 -0500 Subject: [PATCH 2/2] osd: vary tick interval +/- 5% to avoid scrub livelocks If you have two pgs that need to scrub on two OSDs, each the primary for one pg and the replica for the other, you can end up in a livelock: - both osds locally reserve a scrub slot - both osds send a scrub schedule request - both scrub requests are rejected - both osds wait exactly 1 second - repeat Seems a bit unlikely, but I've seen test cases where it goes on more an hour. Fixes: http://tracker.ceph.com/issues/26890 Signed-off-by: Sage Weil --- src/osd/OSD.cc | 23 +++++++++++++++-------- src/osd/OSD.h | 3 ++- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index db51ad153a3..6831fc70271 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -36,6 +36,7 @@ #include "include/types.h" #include "include/compat.h" +#include "include/random.h" #include "OSD.h" #include "OSDMap.h" @@ -169,8 +170,6 @@ #define dout_prefix _prefix(_dout, whoami, get_osdmap_epoch()) -const double OSD::OSD_TICK_INTERVAL = 1.0; - static ostream& _prefix(std::ostream* _dout, int whoami, epoch_t epoch) { return *_dout << "osd." << whoami << " " << epoch << " "; } @@ -603,8 +602,8 @@ void OSDService::promote_throttle_recalibrate() promote_probability_millis = prob; // set hard limits for this interval to mitigate stampedes - promote_max_objects = target_obj_sec * OSD::OSD_TICK_INTERVAL * 2; - promote_max_bytes = target_bytes_sec * OSD::OSD_TICK_INTERVAL * 2; + promote_max_objects = target_obj_sec * osd->OSD_TICK_INTERVAL * 2; + promote_max_bytes = target_bytes_sec * osd->OSD_TICK_INTERVAL * 2; } // ------------------------------------- @@ -1978,6 +1977,14 @@ OSD::~OSD() delete store; } +double OSD::get_tick_interval() const +{ + // vary +/- 5% to avoid scrub scheduling livelocks + constexpr auto delta = 0.05; + return (OSD_TICK_INTERVAL * + ceph::util::generate_random_number(1.0 - delta, 1.0 + delta)); +} + void cls_initialize(ClassHandler *ch); void OSD::handle_signal(int signum) @@ -2606,11 +2613,11 @@ int OSD::init() heartbeat_thread.create("osd_srv_heartbt"); // tick - tick_timer.add_event_after(OSD_TICK_INTERVAL, + tick_timer.add_event_after(get_tick_interval(), new C_Tick(this)); { Mutex::Locker l(tick_timer_lock); - tick_timer_without_osd_lock.add_event_after(OSD_TICK_INTERVAL, + tick_timer_without_osd_lock.add_event_after(get_tick_interval(), new C_Tick_WithoutOSDLock(this)); } @@ -4806,7 +4813,7 @@ void OSD::tick() do_waiters(); - tick_timer.add_event_after(OSD_TICK_INTERVAL, new C_Tick(this)); + tick_timer.add_event_after(get_tick_interval(), new C_Tick(this)); } void OSD::tick_without_osd_lock() @@ -4877,7 +4884,7 @@ void OSD::tick_without_osd_lock() mgrc.update_daemon_health(get_health_metrics()); service.kick_recovery_queue(); - tick_timer_without_osd_lock.add_event_after(OSD_TICK_INTERVAL, + tick_timer_without_osd_lock.add_event_after(get_tick_interval(), new C_Tick_WithoutOSDLock(this)); } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index e435b99000a..18739f52dfa 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1237,7 +1237,8 @@ public: protected: - static const double OSD_TICK_INTERVAL; // tick interval for tick_timer and tick_timer_without_osd_lock + const double OSD_TICK_INTERVAL = { 1.0 }; + double get_tick_interval() const; AuthAuthorizeHandlerRegistry *authorize_handler_cluster_registry; AuthAuthorizeHandlerRegistry *authorize_handler_service_registry;