PGMap,PGMonitor: maintain mapping of osd to recent stat epoch

Also, osd_stat will be empty for out osd.

When an osd is marked out, rather than remove it from osd_stat,
we instead 0 out the structure.

This patch also makes osd_stat_updates and osd_stat_rm private.
This should make it simpler to enforce invariants on these
mappings.

Each up osd will have a mapping since out osds are now included as
empty stats.

Signed-off-by: Samuel Just <sam.just@inktank.com>
This commit is contained in:
Samuel Just 2013-10-04 12:29:26 -07:00
parent 399f1d53f7
commit 091809b814
3 changed files with 87 additions and 19 deletions

View File

@ -30,7 +30,7 @@ void PGMap::Incremental::encode(bufferlist &bl, uint64_t features) const
return;
}
ENCODE_START(6, 5, bl);
ENCODE_START(7, 5, bl);
::encode(version, bl);
::encode(pg_stat_updates, bl);
::encode(osd_stat_updates, bl);
@ -41,6 +41,7 @@ void PGMap::Incremental::encode(bufferlist &bl, uint64_t features) const
::encode(nearfull_ratio, bl);
::encode(pg_remove, bl);
::encode(stamp, bl);
::encode(osd_epochs, bl);
ENCODE_FINISH(bl);
}
@ -89,6 +90,17 @@ void PGMap::Incremental::decode(bufferlist::iterator &bl)
}
if (struct_v >= 6)
::decode(stamp, bl);
if (struct_v >= 7) {
::decode(osd_epochs, bl);
} else {
for (map<int32_t, osd_stat_t>::iterator i = osd_stat_updates.begin();
i != osd_stat_updates.end();
++i) {
// This isn't accurate, but will cause trimming to behave like
// previously.
osd_epochs.insert(make_pair(i->first, osdmap_epoch));
}
}
DECODE_FINISH(bl);
}
@ -195,8 +207,10 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
}
stat_pg_add(update_pg, update_stat);
}
for (map<int32_t,osd_stat_t>::const_iterator p = inc.osd_stat_updates.begin();
p != inc.osd_stat_updates.end();
assert(osd_stat.size() == osd_epochs.size());
for (map<int32_t,osd_stat_t>::const_iterator p =
inc.get_osd_stat_updates().begin();
p != inc.get_osd_stat_updates().end();
++p) {
int osd = p->first;
const osd_stat_t &new_stats(p->second);
@ -209,6 +223,8 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
stat_osd_sub(t->second);
t->second = new_stats;
}
assert(inc.get_osd_epochs().find(osd) != inc.get_osd_epochs().end());
osd_epochs.insert(*(inc.get_osd_epochs().find(osd)));
stat_osd_add(new_stats);
@ -226,8 +242,8 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
}
}
for (set<int>::iterator p = inc.osd_stat_rm.begin();
p != inc.osd_stat_rm.end();
for (set<int>::iterator p = inc.get_osd_stat_rm().begin();
p != inc.get_osd_stat_rm().end();
++p) {
hash_map<int32_t,osd_stat_t>::iterator t = osd_stat.find(*p);
if (t != osd_stat.end()) {
@ -434,7 +450,7 @@ void PGMap::encode(bufferlist &bl, uint64_t features) const
return;
}
ENCODE_START(5, 4, bl);
ENCODE_START(6, 4, bl);
::encode(version, bl);
::encode(pg_stat, bl);
::encode(osd_stat, bl);
@ -443,6 +459,7 @@ void PGMap::encode(bufferlist &bl, uint64_t features) const
::encode(full_ratio, bl);
::encode(nearfull_ratio, bl);
::encode(stamp, bl);
::encode(osd_epochs, bl);
ENCODE_FINISH(bl);
}
@ -472,6 +489,17 @@ void PGMap::decode(bufferlist::iterator &bl)
}
if (struct_v >= 5)
::decode(stamp, bl);
if (struct_v >= 6) {
::decode(osd_epochs, bl);
} else {
for (hash_map<int32_t, osd_stat_t>::iterator i = osd_stat.begin();
i != osd_stat.end();
++i) {
// This isn't accurate, but will cause trimming to behave like
// previously.
osd_epochs.insert(make_pair(i->first, last_osdmap_epoch));
}
}
DECODE_FINISH(bl);
calc_stats();
@ -488,7 +516,10 @@ void PGMap::dirty_all(Incremental& inc)
inc.pg_stat_updates[p->first] = p->second;
}
for (hash_map<int32_t, osd_stat_t>::const_iterator p = osd_stat.begin(); p != osd_stat.end(); ++p) {
inc.osd_stat_updates[p->first] = p->second;
assert(inc.get_osd_epochs().count(p->first));
inc.update_stat(p->first,
inc.get_osd_epochs().find(p->first)->second,
p->second);
}
}

View File

@ -43,12 +43,13 @@ public:
float full_ratio;
float nearfull_ratio;
// mapping of osd to most recently reported osdmap epoch
hash_map<int32_t,epoch_t> osd_epochs;
class Incremental {
public:
version_t version;
map<pg_t,pg_stat_t> pg_stat_updates;
map<int32_t,osd_stat_t> osd_stat_updates;
set<int32_t> osd_stat_rm;
epoch_t osdmap_epoch;
epoch_t pg_scan; // osdmap epoch
set<pg_t> pg_remove;
@ -56,6 +57,38 @@ public:
float nearfull_ratio;
utime_t stamp;
private:
map<int32_t,osd_stat_t> osd_stat_updates;
set<int32_t> osd_stat_rm;
// mapping of osd to most recently reported osdmap epoch
map<int32_t,epoch_t> osd_epochs;
public:
const map<int32_t, osd_stat_t> &get_osd_stat_updates() const {
return osd_stat_updates;
}
const set<int32_t> &get_osd_stat_rm() const {
return osd_stat_rm;
}
const map<int32_t, epoch_t> &get_osd_epochs() const {
return osd_epochs;
}
void update_stat(int32_t osd, epoch_t epoch, const osd_stat_t &stat) {
osd_stat_updates[osd] = stat;
osd_epochs[osd] = epoch;
assert(osd_epochs.size() == osd_stat_updates.size());
}
void stat_osd_out(int32_t osd) {
// 0 the stats for the osd
osd_stat_updates[osd] = osd_stat_t();
}
void rm_stat(int32_t osd) {
osd_stat_rm.insert(osd);
osd_epochs.erase(osd);
osd_stat_updates.erase(osd);
}
void encode(bufferlist &bl, uint64_t features=-1) const;
void decode(bufferlist::iterator &bl);
void dump(Formatter *f) const;

View File

@ -494,15 +494,19 @@ void PGMonitor::encode_pending(MonitorDBStore::Transaction *t)
{
bufferlist dirty;
string prefix = pgmap_osd_prefix;
for (map<int32_t,osd_stat_t>::const_iterator p = pending_inc.osd_stat_updates.begin();
p != pending_inc.osd_stat_updates.end();
for (map<int32_t,osd_stat_t>::const_iterator p =
pending_inc.get_osd_stat_updates().begin();
p != pending_inc.get_osd_stat_updates().end();
++p) {
::encode(p->first, dirty);
bufferlist bl;
::encode(p->second, bl, features);
t->put(prefix, stringify(p->first), bl);
}
for (set<int32_t>::const_iterator p = pending_inc.osd_stat_rm.begin(); p != pending_inc.osd_stat_rm.end(); ++p) {
for (set<int32_t>::const_iterator p =
pending_inc.get_osd_stat_rm().begin();
p != pending_inc.get_osd_stat_rm().end();
++p) {
::encode(*p, dirty);
t->erase(prefix, stringify(*p));
}
@ -725,7 +729,11 @@ bool PGMonitor::prepare_pg_stats(MPGStats *stats)
}
// osd stat
pending_inc.osd_stat_updates[from] = stats->osd_stat;
if (mon->osdmon()->osdmap.is_in(from)) {
pending_inc.update_stat(from, stats->epoch, stats->osd_stat);
} else {
pending_inc.update_stat(from, stats->epoch, osd_stat_t());
}
if (pg_map.osd_stat.count(from))
dout(10) << " got osd." << from << " " << stats->osd_stat << " (was " << pg_map.osd_stat[from] << ")" << dendl;
@ -842,11 +850,7 @@ void PGMonitor::check_osd_map(epoch_t epoch)
++p)
if (p->second == CEPH_OSD_OUT) {
dout(10) << "check_osd_map osd." << p->first << " went OUT" << dendl;
pending_inc.osd_stat_rm.insert(p->first);
} else {
dout(10) << "check_osd_map osd." << p->first << " is IN" << dendl;
pending_inc.osd_stat_rm.erase(p->first);
pending_inc.osd_stat_updates[p->first];
pending_inc.stat_osd_out(p->first);
}
// this is conservative: we want to know if any osds (maybe) got marked down.
@ -867,7 +871,7 @@ void PGMonitor::check_osd_map(epoch_t epoch)
// whether it was created *or* destroyed, we can safely drop
// it's osd_stat_t record.
dout(10) << "check_osd_map osd." << p->first << " created or destroyed" << dendl;
pending_inc.osd_stat_rm.insert(p->first);
pending_inc.rm_stat(p->first);
// and adjust full, nearfull set
pg_map.nearfull_osds.erase(p->first);