Merge pull request from renhwztetecs/renhw-wip-mds-balancer

mds/MDBalancer: cleanup

Reviewed-by: John Spray <john.spray@redhat.com>
This commit is contained in:
John Spray 2016-08-05 12:01:12 +01:00 committed by GitHub
commit 8aeec4cf0d
3 changed files with 39 additions and 72 deletions

View File

@ -475,7 +475,7 @@ OPTION(mds_log_events_per_segment, OPT_INT, 1024)
OPTION(mds_log_segment_size, OPT_INT, 0) // segment size for mds log, default to default file_layout_t
OPTION(mds_log_max_segments, OPT_U32, 30)
OPTION(mds_log_max_expiring, OPT_INT, 20)
OPTION(mds_bal_sample_interval, OPT_FLOAT, 3.0) // every 5 seconds
OPTION(mds_bal_sample_interval, OPT_DOUBLE, 3.0) // every 3 seconds
OPTION(mds_bal_replicate_threshold, OPT_FLOAT, 8000)
OPTION(mds_bal_unreplicate_threshold, OPT_FLOAT, 0)
OPTION(mds_bal_frag, OPT_BOOL, false)

View File

@ -162,7 +162,9 @@ mds_load_t MDBalancer::get_load(utime_t now)
ifstream cpu("/proc/loadavg");
if (cpu.is_open())
cpu >> load.cpu_load_avg;
else
derr << "input file '/proc/loadavg' not found" << dendl;
dout(15) << "get_load " << load << dendl;
return load;
}
@ -456,7 +458,7 @@ void MDBalancer::prep_rebalance(int beat)
<< dendl;
}
double total_load = 0;
double total_load = 0.0;
multimap<double,mds_rank_t> load_map;
for (mds_rank_t i=mds_rank_t(0); i < mds_rank_t(cluster_size); i++) {
map<mds_rank_t, mds_load_t>::value_type val(i, mds_load_t(ceph_clock_now(g_ceph_context)));
@ -552,40 +554,36 @@ void MDBalancer::prep_rebalance(int beat)
}
}
if (1) {
if (beat % 2 == 1) {
// old way
dout(15) << " matching big exporters to big importers" << dendl;
// big exporters to big importers
multimap<double,mds_rank_t>::reverse_iterator ex = exporters.rbegin();
multimap<double,mds_rank_t>::iterator im = importers.begin();
while (ex != exporters.rend() &&
im != importers.end()) {
double maxex = get_maxex(ex->second);
double maxim = get_maxim(im->second);
if (maxex < .001 || maxim < .001) break;
try_match(ex->second, maxex,
im->second, maxim);
if (maxex <= .001) ++ex;
if (maxim <= .001) ++im;
}
} else {
// new way
dout(15) << " matching small exporters to big importers" << dendl;
// small exporters to big importers
multimap<double,mds_rank_t>::iterator ex = exporters.begin();
multimap<double,mds_rank_t>::iterator im = importers.begin();
while (ex != exporters.end() &&
im != importers.end()) {
double maxex = get_maxex(ex->second);
double maxim = get_maxim(im->second);
if (maxex < .001 || maxim < .001) break;
try_match(ex->second, maxex,
im->second, maxim);
if (maxex <= .001) ++ex;
if (maxim <= .001) ++im;
}
// old way
if (beat % 2 == 1) {
dout(15) << " matching big exporters to big importers" << dendl;
// big exporters to big importers
multimap<double,mds_rank_t>::reverse_iterator ex = exporters.rbegin();
multimap<double,mds_rank_t>::iterator im = importers.begin();
while (ex != exporters.rend() &&
im != importers.end()) {
double maxex = get_maxex(ex->second);
double maxim = get_maxim(im->second);
if (maxex < .001 || maxim < .001) break;
try_match(ex->second, maxex,
im->second, maxim);
if (maxex <= .001) ++ex;
if (maxim <= .001) ++im;
}
} else { // new way
dout(15) << " matching small exporters to big importers" << dendl;
// small exporters to big importers
multimap<double,mds_rank_t>::iterator ex = exporters.begin();
multimap<double,mds_rank_t>::iterator im = importers.begin();
while (ex != exporters.end() &&
im != importers.end()) {
double maxex = get_maxex(ex->second);
double maxim = get_maxim(im->second);
if (maxex < .001 || maxim < .001) break;
try_match(ex->second, maxex,
im->second, maxim);
if (maxex <= .001) ++ex;
if (maxim <= .001) ++im;
}
}
}
@ -653,7 +651,7 @@ void MDBalancer::try_rebalance()
//<< " .. " << (*it).second << " * " << load_fac
<< " -> " << amount
<< dendl;//" .. fudge is " << fudge << dendl;
double have = 0;
double have = 0.0;
show_imports();
@ -936,43 +934,12 @@ void MDBalancer::hit_inode(utime_t now, CInode *in, int type, int who)
if (in->get_parent_dn())
hit_dir(now, in->get_parent_dn()->get_dir(), type, who);
}
/*
// hit me
in->popularity[MDS_POP_JUSTME].pop[type].hit(now);
in->popularity[MDS_POP_NESTED].pop[type].hit(now);
if (in->is_auth()) {
in->popularity[MDS_POP_CURDOM].pop[type].hit(now);
in->popularity[MDS_POP_ANYDOM].pop[type].hit(now);
dout(20) << "hit_inode " << type << " pop "
<< in->popularity[MDS_POP_JUSTME].pop[type].get(now) << " me, "
<< in->popularity[MDS_POP_NESTED].pop[type].get(now) << " nested, "
<< in->popularity[MDS_POP_CURDOM].pop[type].get(now) << " curdom, "
<< in->popularity[MDS_POP_CURDOM].pop[type].get(now) << " anydom"
<< " on " << *in
<< dendl;
} else {
dout(20) << "hit_inode " << type << " pop "
<< in->popularity[MDS_POP_JUSTME].pop[type].get(now) << " me, "
<< in->popularity[MDS_POP_NESTED].pop[type].get(now) << " nested, "
<< " on " << *in
<< dendl;
}
// hit auth up to import
CDir *dir = in->get_parent_dir();
if (dir) hit_dir(now, dir, type);
*/
void MDBalancer::hit_dir(utime_t now, CDir *dir, int type, int who, double amount)
{
// hit me
double v = dir->pop_me.get(type).hit(now, amount);
//if (dir->ino() == inodeno_t(0x10000000000))
//dout(0) << "hit_dir " << type << " pop " << v << " in " << *dir << dendl;
// split/merge
if (g_conf->mds_bal_frag && g_conf->mds_bal_fragment_interval > 0 &&
!dir->inode->is_base() && // not root/base (for now at least)
@ -1005,7 +972,7 @@ void MDBalancer::hit_dir(utime_t now, CDir *dir, int type, int who, double amoun
dir->pop_spread.hit(now, mds->mdcache->decayrate, who);
}
double rd_adj = 0;
double rd_adj = 0.0;
if (type == META_POP_IRD &&
dir->last_popularity_sample < last_sample) {
double dir_pop = dir->pop_auth_subtree.get(type).get(now, mds->mdcache->decayrate); // hmm??
@ -1057,7 +1024,7 @@ void MDBalancer::hit_dir(utime_t now, CDir *dir, int type, int who, double amoun
bool hit_subtree = dir->is_auth(); // current auth subtree (if any)
bool hit_subtree_nested = dir->is_auth(); // all nested auth subtrees
while (1) {
while (true) {
dir->pop_nested.get(type).hit(now, amount);
if (rd_adj != 0.0)
dir->pop_nested.get(META_POP_IRD).adjust(now, mds->mdcache->decayrate, rd_adj);

View File

@ -55,7 +55,7 @@ class MDBalancer {
// per-epoch scatter/gathered info
map<mds_rank_t, mds_load_t> mds_load;
map<mds_rank_t, float> mds_meta_load;
map<mds_rank_t, double> mds_meta_load;
map<mds_rank_t, map<mds_rank_t, float> > mds_import_map;
// per-epoch state