mirror of
https://github.com/ceph/ceph
synced 2025-03-11 02:39:05 +00:00
PGMap,PGMonitor: maintain mapping of osd to recent stat epoch
Also, osd_stat will be empty for out osd. When an osd is marked out, rather than remove it from osd_stat, we instead 0 out the structure. This patch also makes osd_stat_updates and osd_stat_rm private. This should make it simpler to enforce invariants on these mappings. Each up osd will have a mapping since out osds are now included as empty stats. Signed-off-by: Samuel Just <sam.just@inktank.com>
This commit is contained in:
parent
399f1d53f7
commit
091809b814
@ -30,7 +30,7 @@ void PGMap::Incremental::encode(bufferlist &bl, uint64_t features) const
|
||||
return;
|
||||
}
|
||||
|
||||
ENCODE_START(6, 5, bl);
|
||||
ENCODE_START(7, 5, bl);
|
||||
::encode(version, bl);
|
||||
::encode(pg_stat_updates, bl);
|
||||
::encode(osd_stat_updates, bl);
|
||||
@ -41,6 +41,7 @@ void PGMap::Incremental::encode(bufferlist &bl, uint64_t features) const
|
||||
::encode(nearfull_ratio, bl);
|
||||
::encode(pg_remove, bl);
|
||||
::encode(stamp, bl);
|
||||
::encode(osd_epochs, bl);
|
||||
ENCODE_FINISH(bl);
|
||||
}
|
||||
|
||||
@ -89,6 +90,17 @@ void PGMap::Incremental::decode(bufferlist::iterator &bl)
|
||||
}
|
||||
if (struct_v >= 6)
|
||||
::decode(stamp, bl);
|
||||
if (struct_v >= 7) {
|
||||
::decode(osd_epochs, bl);
|
||||
} else {
|
||||
for (map<int32_t, osd_stat_t>::iterator i = osd_stat_updates.begin();
|
||||
i != osd_stat_updates.end();
|
||||
++i) {
|
||||
// This isn't accurate, but will cause trimming to behave like
|
||||
// previously.
|
||||
osd_epochs.insert(make_pair(i->first, osdmap_epoch));
|
||||
}
|
||||
}
|
||||
DECODE_FINISH(bl);
|
||||
}
|
||||
|
||||
@ -195,8 +207,10 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
|
||||
}
|
||||
stat_pg_add(update_pg, update_stat);
|
||||
}
|
||||
for (map<int32_t,osd_stat_t>::const_iterator p = inc.osd_stat_updates.begin();
|
||||
p != inc.osd_stat_updates.end();
|
||||
assert(osd_stat.size() == osd_epochs.size());
|
||||
for (map<int32_t,osd_stat_t>::const_iterator p =
|
||||
inc.get_osd_stat_updates().begin();
|
||||
p != inc.get_osd_stat_updates().end();
|
||||
++p) {
|
||||
int osd = p->first;
|
||||
const osd_stat_t &new_stats(p->second);
|
||||
@ -209,6 +223,8 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
|
||||
stat_osd_sub(t->second);
|
||||
t->second = new_stats;
|
||||
}
|
||||
assert(inc.get_osd_epochs().find(osd) != inc.get_osd_epochs().end());
|
||||
osd_epochs.insert(*(inc.get_osd_epochs().find(osd)));
|
||||
|
||||
stat_osd_add(new_stats);
|
||||
|
||||
@ -226,8 +242,8 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
|
||||
}
|
||||
}
|
||||
|
||||
for (set<int>::iterator p = inc.osd_stat_rm.begin();
|
||||
p != inc.osd_stat_rm.end();
|
||||
for (set<int>::iterator p = inc.get_osd_stat_rm().begin();
|
||||
p != inc.get_osd_stat_rm().end();
|
||||
++p) {
|
||||
hash_map<int32_t,osd_stat_t>::iterator t = osd_stat.find(*p);
|
||||
if (t != osd_stat.end()) {
|
||||
@ -434,7 +450,7 @@ void PGMap::encode(bufferlist &bl, uint64_t features) const
|
||||
return;
|
||||
}
|
||||
|
||||
ENCODE_START(5, 4, bl);
|
||||
ENCODE_START(6, 4, bl);
|
||||
::encode(version, bl);
|
||||
::encode(pg_stat, bl);
|
||||
::encode(osd_stat, bl);
|
||||
@ -443,6 +459,7 @@ void PGMap::encode(bufferlist &bl, uint64_t features) const
|
||||
::encode(full_ratio, bl);
|
||||
::encode(nearfull_ratio, bl);
|
||||
::encode(stamp, bl);
|
||||
::encode(osd_epochs, bl);
|
||||
ENCODE_FINISH(bl);
|
||||
}
|
||||
|
||||
@ -472,6 +489,17 @@ void PGMap::decode(bufferlist::iterator &bl)
|
||||
}
|
||||
if (struct_v >= 5)
|
||||
::decode(stamp, bl);
|
||||
if (struct_v >= 6) {
|
||||
::decode(osd_epochs, bl);
|
||||
} else {
|
||||
for (hash_map<int32_t, osd_stat_t>::iterator i = osd_stat.begin();
|
||||
i != osd_stat.end();
|
||||
++i) {
|
||||
// This isn't accurate, but will cause trimming to behave like
|
||||
// previously.
|
||||
osd_epochs.insert(make_pair(i->first, last_osdmap_epoch));
|
||||
}
|
||||
}
|
||||
DECODE_FINISH(bl);
|
||||
|
||||
calc_stats();
|
||||
@ -488,7 +516,10 @@ void PGMap::dirty_all(Incremental& inc)
|
||||
inc.pg_stat_updates[p->first] = p->second;
|
||||
}
|
||||
for (hash_map<int32_t, osd_stat_t>::const_iterator p = osd_stat.begin(); p != osd_stat.end(); ++p) {
|
||||
inc.osd_stat_updates[p->first] = p->second;
|
||||
assert(inc.get_osd_epochs().count(p->first));
|
||||
inc.update_stat(p->first,
|
||||
inc.get_osd_epochs().find(p->first)->second,
|
||||
p->second);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -43,12 +43,13 @@ public:
|
||||
float full_ratio;
|
||||
float nearfull_ratio;
|
||||
|
||||
// mapping of osd to most recently reported osdmap epoch
|
||||
hash_map<int32_t,epoch_t> osd_epochs;
|
||||
|
||||
class Incremental {
|
||||
public:
|
||||
version_t version;
|
||||
map<pg_t,pg_stat_t> pg_stat_updates;
|
||||
map<int32_t,osd_stat_t> osd_stat_updates;
|
||||
set<int32_t> osd_stat_rm;
|
||||
epoch_t osdmap_epoch;
|
||||
epoch_t pg_scan; // osdmap epoch
|
||||
set<pg_t> pg_remove;
|
||||
@ -56,6 +57,38 @@ public:
|
||||
float nearfull_ratio;
|
||||
utime_t stamp;
|
||||
|
||||
private:
|
||||
map<int32_t,osd_stat_t> osd_stat_updates;
|
||||
set<int32_t> osd_stat_rm;
|
||||
|
||||
// mapping of osd to most recently reported osdmap epoch
|
||||
map<int32_t,epoch_t> osd_epochs;
|
||||
public:
|
||||
|
||||
const map<int32_t, osd_stat_t> &get_osd_stat_updates() const {
|
||||
return osd_stat_updates;
|
||||
}
|
||||
const set<int32_t> &get_osd_stat_rm() const {
|
||||
return osd_stat_rm;
|
||||
}
|
||||
const map<int32_t, epoch_t> &get_osd_epochs() const {
|
||||
return osd_epochs;
|
||||
}
|
||||
|
||||
void update_stat(int32_t osd, epoch_t epoch, const osd_stat_t &stat) {
|
||||
osd_stat_updates[osd] = stat;
|
||||
osd_epochs[osd] = epoch;
|
||||
assert(osd_epochs.size() == osd_stat_updates.size());
|
||||
}
|
||||
void stat_osd_out(int32_t osd) {
|
||||
// 0 the stats for the osd
|
||||
osd_stat_updates[osd] = osd_stat_t();
|
||||
}
|
||||
void rm_stat(int32_t osd) {
|
||||
osd_stat_rm.insert(osd);
|
||||
osd_epochs.erase(osd);
|
||||
osd_stat_updates.erase(osd);
|
||||
}
|
||||
void encode(bufferlist &bl, uint64_t features=-1) const;
|
||||
void decode(bufferlist::iterator &bl);
|
||||
void dump(Formatter *f) const;
|
||||
|
@ -494,15 +494,19 @@ void PGMonitor::encode_pending(MonitorDBStore::Transaction *t)
|
||||
{
|
||||
bufferlist dirty;
|
||||
string prefix = pgmap_osd_prefix;
|
||||
for (map<int32_t,osd_stat_t>::const_iterator p = pending_inc.osd_stat_updates.begin();
|
||||
p != pending_inc.osd_stat_updates.end();
|
||||
for (map<int32_t,osd_stat_t>::const_iterator p =
|
||||
pending_inc.get_osd_stat_updates().begin();
|
||||
p != pending_inc.get_osd_stat_updates().end();
|
||||
++p) {
|
||||
::encode(p->first, dirty);
|
||||
bufferlist bl;
|
||||
::encode(p->second, bl, features);
|
||||
t->put(prefix, stringify(p->first), bl);
|
||||
}
|
||||
for (set<int32_t>::const_iterator p = pending_inc.osd_stat_rm.begin(); p != pending_inc.osd_stat_rm.end(); ++p) {
|
||||
for (set<int32_t>::const_iterator p =
|
||||
pending_inc.get_osd_stat_rm().begin();
|
||||
p != pending_inc.get_osd_stat_rm().end();
|
||||
++p) {
|
||||
::encode(*p, dirty);
|
||||
t->erase(prefix, stringify(*p));
|
||||
}
|
||||
@ -725,7 +729,11 @@ bool PGMonitor::prepare_pg_stats(MPGStats *stats)
|
||||
}
|
||||
|
||||
// osd stat
|
||||
pending_inc.osd_stat_updates[from] = stats->osd_stat;
|
||||
if (mon->osdmon()->osdmap.is_in(from)) {
|
||||
pending_inc.update_stat(from, stats->epoch, stats->osd_stat);
|
||||
} else {
|
||||
pending_inc.update_stat(from, stats->epoch, osd_stat_t());
|
||||
}
|
||||
|
||||
if (pg_map.osd_stat.count(from))
|
||||
dout(10) << " got osd." << from << " " << stats->osd_stat << " (was " << pg_map.osd_stat[from] << ")" << dendl;
|
||||
@ -842,11 +850,7 @@ void PGMonitor::check_osd_map(epoch_t epoch)
|
||||
++p)
|
||||
if (p->second == CEPH_OSD_OUT) {
|
||||
dout(10) << "check_osd_map osd." << p->first << " went OUT" << dendl;
|
||||
pending_inc.osd_stat_rm.insert(p->first);
|
||||
} else {
|
||||
dout(10) << "check_osd_map osd." << p->first << " is IN" << dendl;
|
||||
pending_inc.osd_stat_rm.erase(p->first);
|
||||
pending_inc.osd_stat_updates[p->first];
|
||||
pending_inc.stat_osd_out(p->first);
|
||||
}
|
||||
|
||||
// this is conservative: we want to know if any osds (maybe) got marked down.
|
||||
@ -867,7 +871,7 @@ void PGMonitor::check_osd_map(epoch_t epoch)
|
||||
// whether it was created *or* destroyed, we can safely drop
|
||||
// it's osd_stat_t record.
|
||||
dout(10) << "check_osd_map osd." << p->first << " created or destroyed" << dendl;
|
||||
pending_inc.osd_stat_rm.insert(p->first);
|
||||
pending_inc.rm_stat(p->first);
|
||||
|
||||
// and adjust full, nearfull set
|
||||
pg_map.nearfull_osds.erase(p->first);
|
||||
|
Loading…
Reference in New Issue
Block a user