mirror of
https://github.com/ceph/ceph
synced 2025-03-11 02:39:05 +00:00
Merge pull request #6550 from cernceph/wip-deepscrub
osd: randomize deep scrubbing Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
commit
bc7d677185
@ -686,6 +686,7 @@ OPTION(osd_scrub_sleep, OPT_FLOAT, 0) // sleep between [deep]scrub ops
|
||||
OPTION(osd_scrub_auto_repair, OPT_BOOL, false) // whether auto-repair inconsistencies upon deep-scrubbing
|
||||
OPTION(osd_scrub_auto_repair_num_errors, OPT_U32, 5) // only auto-repair when number of errors is below this threshold
|
||||
OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week
|
||||
OPTION(osd_deep_scrub_randomize_ratio, OPT_FLOAT, 0.15) // scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs are deep)
|
||||
OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
|
||||
OPTION(osd_deep_scrub_update_digest_min_age, OPT_INT, 2*60*60) // objects must be this old (seconds) before we update the whole-object digest on scrub
|
||||
OPTION(osd_scan_list_ping_tp_interval, OPT_U64, 100)
|
||||
|
@ -1809,6 +1809,15 @@ int OSD::init()
|
||||
|
||||
dout(2) << "boot" << dendl;
|
||||
|
||||
// initialize the daily loadavg with current 15min loadavg
|
||||
double loadavgs[3];
|
||||
if (getloadavg(loadavgs, 3) == 3) {
|
||||
daily_loadavg = loadavgs[2];
|
||||
} else {
|
||||
derr << "OSD::init() : couldn't read loadavgs\n" << dendl;
|
||||
daily_loadavg = 1.0;
|
||||
}
|
||||
|
||||
// read superblock
|
||||
r = read_superblock();
|
||||
if (r < 0) {
|
||||
@ -3862,8 +3871,12 @@ void OSD::heartbeat()
|
||||
|
||||
// get CPU load avg
|
||||
double loadavgs[1];
|
||||
if (getloadavg(loadavgs, 1) == 1)
|
||||
int n_samples = 86400 / cct->_conf->osd_heartbeat_interval;
|
||||
if (getloadavg(loadavgs, 1) == 1) {
|
||||
logger->set(l_osd_loadavg, 100 * loadavgs[0]);
|
||||
daily_loadavg = (daily_loadavg * (n_samples - 1) + loadavgs[0]) / n_samples;
|
||||
dout(30) << "heartbeat: daily_loadavg " << daily_loadavg << dendl;
|
||||
}
|
||||
|
||||
dout(30) << "heartbeat checking stats" << dendl;
|
||||
|
||||
@ -6065,23 +6078,35 @@ bool OSD::scrub_time_permit(utime_t now)
|
||||
|
||||
bool OSD::scrub_load_below_threshold()
|
||||
{
|
||||
double loadavgs[1];
|
||||
if (getloadavg(loadavgs, 1) != 1) {
|
||||
double loadavgs[3];
|
||||
if (getloadavg(loadavgs, 3) != 3) {
|
||||
dout(10) << __func__ << " couldn't read loadavgs\n" << dendl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (loadavgs[0] >= cct->_conf->osd_scrub_load_threshold) {
|
||||
dout(20) << __func__ << " loadavg " << loadavgs[0]
|
||||
<< " >= max " << cct->_conf->osd_scrub_load_threshold
|
||||
<< " = no, load too high" << dendl;
|
||||
return false;
|
||||
} else {
|
||||
// allow scrub if below configured threshold
|
||||
if (loadavgs[0] < cct->_conf->osd_scrub_load_threshold) {
|
||||
dout(20) << __func__ << " loadavg " << loadavgs[0]
|
||||
<< " < max " << cct->_conf->osd_scrub_load_threshold
|
||||
<< " = yes" << dendl;
|
||||
return true;
|
||||
}
|
||||
|
||||
// allow scrub if below daily avg and currently decreasing
|
||||
if (loadavgs[0] < daily_loadavg && loadavgs[0] < loadavgs[2]) {
|
||||
dout(20) << __func__ << " loadavg " << loadavgs[0]
|
||||
<< " < daily_loadavg " << daily_loadavg
|
||||
<< " and < 15m avg " << loadavgs[2]
|
||||
<< " = yes" << dendl;
|
||||
return true;
|
||||
}
|
||||
|
||||
dout(20) << __func__ << " loadavg " << loadavgs[0]
|
||||
<< " >= max " << cct->_conf->osd_scrub_load_threshold
|
||||
<< " and ( >= daily_loadavg " << daily_loadavg
|
||||
<< " or >= 15m avg " << loadavgs[2]
|
||||
<< ") = no" << dendl;
|
||||
return false;
|
||||
}
|
||||
|
||||
void OSD::sched_scrub()
|
||||
|
@ -1505,6 +1505,7 @@ private:
|
||||
Messenger *hb_front_server_messenger;
|
||||
Messenger *hb_back_server_messenger;
|
||||
utime_t last_heartbeat_resample; ///< last time we chose random peers in waiting-for-healthy state
|
||||
double daily_loadavg;
|
||||
|
||||
void _add_heartbeat_peer(int p);
|
||||
void _remove_heartbeat_peer(int p);
|
||||
|
@ -3246,6 +3246,11 @@ bool PG::sched_scrub()
|
||||
bool time_for_deep = (ceph_clock_now(cct) >=
|
||||
info.history.last_deep_scrub_stamp + cct->_conf->osd_deep_scrub_interval);
|
||||
|
||||
bool deep_coin_flip = (rand() % 100) < cct->_conf->osd_deep_scrub_randomize_ratio * 100;
|
||||
dout(20) << __func__ << ": time_for_deep=" << time_for_deep << " deep_coin_flip=" << deep_coin_flip << dendl;
|
||||
|
||||
time_for_deep = (time_for_deep || deep_coin_flip);
|
||||
|
||||
//NODEEP_SCRUB so ignore time initiated deep-scrub
|
||||
if (osd->osd->get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) ||
|
||||
pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB))
|
||||
|
Loading…
Reference in New Issue
Block a user