Merge remote branch 'origin/overload2' into unstable

This commit is contained in:
Sage Weil 2011-01-15 15:52:36 -08:00
commit 8aa4a95768
4 changed files with 147 additions and 30 deletions

View File

@ -146,6 +146,80 @@ void OSDMonitor::remove_redundant_pg_temp()
}
}
/* Assign a lower weight to overloaded OSDs.
*
* The osds that will get a lower weight are those with with a utilization
* percentage 'oload' percent greater than the average utilization.
*/
int OSDMonitor::reweight_by_utilization(int oload, std::string& out_str)
{
if (oload <= 100) {
ostringstream oss;
oss << "You must give a percentageh higher than 100. "
"The reweighting threshold will be calculated as <average-utilization> "
"times <input-percentage>. For example, an argument of 200 would "
"reweight OSDs which are twice as utilized as the average OSD.\n";
out_str = oss.str();
dout(0) << "reweight_by_utilization: " << out_str << dendl;
return -EINVAL;
}
// Avoid putting a small number (or 0) in the denominator when calculating
// average_full
const PGMap &pgm = mon->pgmon()->pg_map;
if (pgm.osd_sum.kb < 1024) {
ostringstream oss;
oss << "Refusing to reweight: we only have " << pgm.osd_sum << " kb "
"across all osds!\n";
out_str = oss.str();
dout(0) << "reweight_by_utilization: " << out_str << dendl;
return -EDOM;
}
if (pgm.osd_sum.kb_used < 5 * 1024) {
ostringstream oss;
oss << "Refusing to reweight: we only have " << pgm.osd_sum << " kb "
"used across all osds!\n";
out_str = oss.str();
dout(0) << "reweight_by_utilization: " << out_str << dendl;
return -EDOM;
}
// Assign a lower weight to overloaded OSDs
float average_full = pgm.osd_sum.kb_used;
average_full /= pgm.osd_sum.kb;
float overload_full = average_full;
overload_full *= oload;
overload_full /= 100.0;
ostringstream oss;
char buf[128];
snprintf(buf, sizeof(buf), "average_full: %04f, overload_full: %04f. ",
average_full, overload_full);
oss << buf;
std::string sep;
oss << "overloaded osds: ";
for (hash_map<int,osd_stat_t>::const_iterator p = pgm.osd_stat.begin();
p != pgm.osd_stat.end();
++p) {
float full = p->second.kb_used;
full /= p->second.kb;
if (full >= overload_full) {
sep = ", ";
float new_weight = (1.0f - full) / (1.0f - overload_full);
osdmap.set_weightf(p->first, new_weight);
char buf[128];
snprintf(buf, sizeof(buf), "%d [%04f]", p->first, new_weight);
oss << sep << buf;
}
}
if (sep.empty()) {
oss << "(none)";
}
out_str = oss.str();
dout(0) << "reweight_by_utilization: finished with " << out_str << dendl;
return 0;
}
void OSDMonitor::create_pending()
{
@ -1632,6 +1706,21 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
goto out;
}
}
else if ((m->cmd.size() > 1) &&
(m->cmd[1] == "reweight-by-utilization")) {
int oload = 70;
if (m->cmd.size() > 2) {
oload = atoi(m->cmd[2].c_str());
}
string out_str;
err = reweight_by_utilization(oload, out_str);
if (err) {
ss << "FAILED to reweight-by-utilization: " << out_str;
}
else {
ss << "SUCCESSFUL reweight-by-utilization: " << out_str;
}
}
else {
ss << "unknown command " << m->cmd[1];
}

View File

@ -72,7 +72,7 @@ private:
void send_incremental(epoch_t first, entity_inst_t& dest);
void remove_redundant_pg_temp();
int reweight_by_utilization(int oload, std::string& out_str);
bool preprocess_failure(class MOSDFailure *m);
bool prepare_failure(class MOSDFailure *m);

View File

@ -73,26 +73,43 @@ public:
Incremental() : version(0), osdmap_epoch(0), pg_scan(0) {}
};
void apply_incremental(Incremental& inc) {
void apply_incremental(const Incremental& inc) {
assert(inc.version == version+1);
version++;
for (map<pg_t,pg_stat_t>::iterator p = inc.pg_stat_updates.begin();
for (map<pg_t,pg_stat_t>::const_iterator p = inc.pg_stat_updates.begin();
p != inc.pg_stat_updates.end();
++p) {
if (pg_stat.count(p->first))
stat_pg_sub(p->first, pg_stat[p->first]);
if (pg_stat.count(p->first) == 0)
pg_set.insert(p->first);
pg_stat[p->first] = p->second;
stat_pg_add(p->first, p->second);
const pg_t &update_pg(p->first);
const pg_stat_t &update_stat(p->second);
hash_map<pg_t,pg_stat_t>::iterator t = pg_stat.find(update_pg);
if (t == pg_stat.end()) {
hash_map<pg_t,pg_stat_t>::value_type v(update_pg, update_stat);
pg_stat.insert(v);
}
else {
stat_pg_sub(update_pg, t->second);
t->second = update_stat;
}
stat_pg_add(update_pg, update_stat);
}
for (map<int,osd_stat_t>::iterator p = inc.osd_stat_updates.begin();
for (map<int,osd_stat_t>::const_iterator p = inc.osd_stat_updates.begin();
p != inc.osd_stat_updates.end();
++p) {
if (osd_stat.count(p->first))
stat_osd_sub(osd_stat[p->first]);
osd_stat[p->first] = p->second;
stat_osd_add(p->second);
int osd = p->first;
const osd_stat_t &new_stats(p->second);
hash_map<int,osd_stat_t>::iterator t = osd_stat.find(osd);
if (t == osd_stat.end()) {
hash_map<int,osd_stat_t>::value_type v(osd, new_stats);
osd_stat.insert(v);
}
else {
stat_osd_sub(t->second);
t->second = new_stats;
}
stat_osd_add(new_stats);
//update the full/nearful_osd sets
int from = p->first;
float ratio = ((float)p->second.kb_used) / (float) p->second.kb;
@ -109,23 +126,30 @@ public:
nearfull_osds.erase(from);
}
}
for (set<pg_t>::iterator p = inc.pg_remove.begin();
for (set<pg_t>::const_iterator p = inc.pg_remove.begin();
p != inc.pg_remove.end();
p++) {
if (pg_set.count(*p)) {
pg_set.erase(*p);
stat_pg_sub(*p, pg_stat[*p]);
pg_stat.erase(*p);
const pg_t &removed_pg(*p);
set<pg_t>::iterator t = pg_set.find(removed_pg);
if (t != pg_set.end()) {
hash_map<pg_t,pg_stat_t>::iterator s = pg_stat.find(removed_pg);
if (s != pg_stat.end()) {
stat_pg_sub(removed_pg, s->second);
pg_stat.erase(s);
}
pg_set.erase(t);
}
}
for (set<int>::iterator p = inc.osd_stat_rm.begin();
p != inc.osd_stat_rm.end();
p++)
if (osd_stat.count(*p)) {
stat_osd_sub(osd_stat[*p]);
osd_stat.erase(*p);
p++) {
hash_map<int,osd_stat_t>::iterator t = osd_stat.find(*p);
if (t != osd_stat.end()) {
stat_osd_sub(t->second);
osd_stat.erase(t);
}
}
if (inc.osdmap_epoch)
last_osdmap_epoch = inc.osdmap_epoch;
@ -153,7 +177,7 @@ public:
pg_sum = pool_stat_t();
osd_sum = osd_stat_t();
}
void stat_pg_add(pg_t pgid, pg_stat_t &s) {
void stat_pg_add(const pg_t &pgid, const pg_stat_t &s) {
num_pg++;
num_pg_by_state[s.state]++;
pg_pool_sum[pgid.pool()].add(s);
@ -161,7 +185,7 @@ public:
if (s.state & PG_STATE_CREATING)
creating_pgs.insert(pgid);
}
void stat_pg_sub(pg_t pgid, pg_stat_t &s) {
void stat_pg_sub(const pg_t &pgid, const pg_stat_t &s) {
num_pg--;
if (--num_pg_by_state[s.state] == 0)
num_pg_by_state.erase(s.state);
@ -170,11 +194,11 @@ public:
if (s.state & PG_STATE_CREATING)
creating_pgs.erase(pgid);
}
void stat_osd_add(osd_stat_t &s) {
void stat_osd_add(const osd_stat_t &s) {
num_osd++;
osd_sum.add(s);
}
void stat_osd_sub(osd_stat_t &s) {
void stat_osd_sub(const osd_stat_t &s) {
num_osd--;
osd_sum.sub(s);
}

View File

@ -403,14 +403,18 @@ private:
float get_weightf(int o) const {
return (float)get_weight(o) / (float)CEPH_OSD_IN;
}
void adjust_osd_weights(map<int,double>& weights, Incremental& inc) {
void adjust_osd_weights(const map<int,double>& weights, Incremental& inc) const {
float max = 0;
for (map<int,double>::iterator p = weights.begin(); p != weights.end(); p++)
for (map<int,double>::const_iterator p = weights.begin();
p != weights.end(); ++p) {
if (p->second > max)
max = p->second;
}
for (map<int,double>::iterator p = weights.begin(); p != weights.end(); p++)
for (map<int,double>::const_iterator p = weights.begin();
p != weights.end(); ++p) {
inc.new_weight[p->first] = (unsigned)((p->second / max) * CEPH_OSD_IN);
}
}