Merge PR #23512 into master

* refs/pull/23512/head:
	osd: vary tick interval +/- 5% to avoid scrub livelocks
	osd: tick at OSD_TICK_INTERVAL, not heartbeat interval

Reviewed-by: Brad Hubbard <bhubbard@redhat.com>
Reviewed-by: Kefu Chai <kchai@redhat.com>
This commit is contained in:
Sage Weil 2018-08-13 13:00:46 -05:00
commit 211cd6388b
2 changed files with 20 additions and 9 deletions

View File

@ -36,6 +36,7 @@
#include "include/types.h" #include "include/types.h"
#include "include/compat.h" #include "include/compat.h"
#include "include/random.h"
#include "OSD.h" #include "OSD.h"
#include "OSDMap.h" #include "OSDMap.h"
@ -169,8 +170,6 @@
#define dout_prefix _prefix(_dout, whoami, get_osdmap_epoch()) #define dout_prefix _prefix(_dout, whoami, get_osdmap_epoch())
const double OSD::OSD_TICK_INTERVAL = 1.0;
static ostream& _prefix(std::ostream* _dout, int whoami, epoch_t epoch) { static ostream& _prefix(std::ostream* _dout, int whoami, epoch_t epoch) {
return *_dout << "osd." << whoami << " " << epoch << " "; return *_dout << "osd." << whoami << " " << epoch << " ";
} }
@ -603,8 +602,8 @@ void OSDService::promote_throttle_recalibrate()
promote_probability_millis = prob; promote_probability_millis = prob;
// set hard limits for this interval to mitigate stampedes // set hard limits for this interval to mitigate stampedes
promote_max_objects = target_obj_sec * OSD::OSD_TICK_INTERVAL * 2; promote_max_objects = target_obj_sec * osd->OSD_TICK_INTERVAL * 2;
promote_max_bytes = target_bytes_sec * OSD::OSD_TICK_INTERVAL * 2; promote_max_bytes = target_bytes_sec * osd->OSD_TICK_INTERVAL * 2;
} }
// ------------------------------------- // -------------------------------------
@ -1978,6 +1977,14 @@ OSD::~OSD()
delete store; delete store;
} }
double OSD::get_tick_interval() const
{
// vary +/- 5% to avoid scrub scheduling livelocks
constexpr auto delta = 0.05;
return (OSD_TICK_INTERVAL *
ceph::util::generate_random_number(1.0 - delta, 1.0 + delta));
}
void cls_initialize(ClassHandler *ch); void cls_initialize(ClassHandler *ch);
void OSD::handle_signal(int signum) void OSD::handle_signal(int signum)
@ -2606,10 +2613,12 @@ int OSD::init()
heartbeat_thread.create("osd_srv_heartbt"); heartbeat_thread.create("osd_srv_heartbt");
// tick // tick
tick_timer.add_event_after(cct->_conf->osd_heartbeat_interval, new C_Tick(this)); tick_timer.add_event_after(get_tick_interval(),
new C_Tick(this));
{ {
Mutex::Locker l(tick_timer_lock); Mutex::Locker l(tick_timer_lock);
tick_timer_without_osd_lock.add_event_after(cct->_conf->osd_heartbeat_interval, new C_Tick_WithoutOSDLock(this)); tick_timer_without_osd_lock.add_event_after(get_tick_interval(),
new C_Tick_WithoutOSDLock(this));
} }
osd_lock.Unlock(); osd_lock.Unlock();
@ -4813,7 +4822,7 @@ void OSD::tick()
do_waiters(); do_waiters();
tick_timer.add_event_after(OSD_TICK_INTERVAL, new C_Tick(this)); tick_timer.add_event_after(get_tick_interval(), new C_Tick(this));
} }
void OSD::tick_without_osd_lock() void OSD::tick_without_osd_lock()
@ -4884,7 +4893,8 @@ void OSD::tick_without_osd_lock()
mgrc.update_daemon_health(get_health_metrics()); mgrc.update_daemon_health(get_health_metrics());
service.kick_recovery_queue(); service.kick_recovery_queue();
tick_timer_without_osd_lock.add_event_after(OSD_TICK_INTERVAL, new C_Tick_WithoutOSDLock(this)); tick_timer_without_osd_lock.add_event_after(get_tick_interval(),
new C_Tick_WithoutOSDLock(this));
} }
// Usage: // Usage:

View File

@ -1237,7 +1237,8 @@ public:
protected: protected:
static const double OSD_TICK_INTERVAL; // tick interval for tick_timer and tick_timer_without_osd_lock const double OSD_TICK_INTERVAL = { 1.0 };
double get_tick_interval() const;
AuthAuthorizeHandlerRegistry *authorize_handler_cluster_registry; AuthAuthorizeHandlerRegistry *authorize_handler_cluster_registry;
AuthAuthorizeHandlerRegistry *authorize_handler_service_registry; AuthAuthorizeHandlerRegistry *authorize_handler_service_registry;