mon: refactor PGMap updating code for reuse in mgr

...and remove the copypasta from mgr.

mgr still doesn't do all the same logic (it doesn't
have osdmap history handy) but it is now much easier
to see which bits are used one place vs. the other.

Signed-off-by: John Spray <john.spray@redhat.com>
This commit is contained in:
John Spray 2016-09-22 17:09:22 +01:00
parent 58dd3db0be
commit afa7078763
6 changed files with 336 additions and 526 deletions

View File

@ -99,239 +99,13 @@ void ClusterState::notify_osdmap(const OSDMap &osd_map)
PGMap::Incremental pending_inc;
pending_inc.version = pg_map.version + 1; // to make apply_incremental happy
_update_creating_pgs(osd_map, &pending_inc);
_register_new_pgs(osd_map, &pending_inc);
PGMapUpdater::update_creating_pgs(osd_map, &pg_map, &pending_inc);
PGMapUpdater::register_new_pgs(osd_map, &pg_map, &pending_inc);
pg_map.apply_incremental(g_ceph_context, pending_inc);
// TODO: Reinstate check_down_pgs logic?
}
void ClusterState::_register_new_pgs(
const OSDMap &osd_map,
PGMap::Incremental *pending_inc)
{
// iterate over crush mapspace
epoch_t epoch = osd_map.get_epoch();
dout(10) << "checking pg pools for osdmap epoch " << epoch
<< ", last_pg_scan " << pg_map.last_pg_scan << dendl;
int created = 0;
for (const auto & p : osd_map.pools) {
int64_t poolid = p.first;
const pg_pool_t &pool = p.second;
int ruleno = osd_map.crush->find_rule(pool.get_crush_ruleset(),
pool.get_type(), pool.get_size());
if (ruleno < 0 || !osd_map.crush->rule_exists(ruleno))
continue;
if (pool.get_last_change() <= pg_map.last_pg_scan ||
pool.get_last_change() <= pending_inc->pg_scan) {
dout(10) << " no change in pool " << poolid << " " << pool << dendl;
continue;
}
dout(10) << "scanning pool " << poolid
<< " " << pool << dendl;
// first pgs in this pool
bool new_pool = pg_map.pg_pool_sum.count(poolid) == 0;
for (ps_t ps = 0; ps < pool.get_pg_num(); ps++) {
pg_t pgid(ps, poolid, -1);
if (pg_map.pg_stat.count(pgid)) {
dout(20) << "register_new_pgs have " << pgid << dendl;
continue;
}
created++;
_register_pg(osd_map, pgid, pool.get_last_change(), new_pool,
pending_inc);
}
}
int removed = 0;
for (const auto &p : pg_map.creating_pgs) {
if (p.preferred() >= 0) {
dout(20) << " removing creating_pg " << p
<< " because it is localized and obsolete" << dendl;
pending_inc->pg_remove.insert(p);
removed++;
}
if (!osd_map.have_pg_pool(p.pool())) {
dout(20) << " removing creating_pg " << p
<< " because containing pool deleted" << dendl;
pending_inc->pg_remove.insert(p);
++removed;
}
}
// deleted pools?
for (const auto & p : pg_map.pg_stat) {
if (!osd_map.have_pg_pool(p.first.pool())) {
dout(20) << " removing pg_stat " << p.first << " because "
<< "containing pool deleted" << dendl;
pending_inc->pg_remove.insert(p.first);
++removed;
}
if (p.first.preferred() >= 0) {
dout(20) << " removing localized pg " << p.first << dendl;
pending_inc->pg_remove.insert(p.first);
++removed;
}
}
// we don't want to redo this work if we can avoid it.
pending_inc->pg_scan = epoch;
dout(10) << "register_new_pgs registered " << created << " new pgs, removed "
<< removed << " uncreated pgs" << dendl;
}
void ClusterState::_register_pg(
const OSDMap &osd_map,
pg_t pgid, epoch_t epoch,
bool new_pool,
PGMap::Incremental *pending_inc)
{
pg_t parent;
int split_bits = 0;
bool parent_found = false;
if (!new_pool) {
parent = pgid;
while (1) {
// remove most significant bit
int msb = cbits(parent.ps());
if (!msb)
break;
parent.set_ps(parent.ps() & ~(1<<(msb-1)));
split_bits++;
dout(30) << " is " << pgid << " parent " << parent << " ?" << dendl;
if (pg_map.pg_stat.count(parent) &&
pg_map.pg_stat[parent].state != PG_STATE_CREATING) {
dout(10) << " parent is " << parent << dendl;
parent_found = true;
break;
}
}
}
pg_stat_t &stats = pending_inc->pg_stat_updates[pgid];
stats.state = PG_STATE_CREATING;
stats.created = epoch;
stats.parent = parent;
stats.parent_split_bits = split_bits;
stats.mapping_epoch = epoch;
if (parent_found) {
pg_stat_t &ps = pg_map.pg_stat[parent];
stats.last_fresh = ps.last_fresh;
stats.last_active = ps.last_active;
stats.last_change = ps.last_change;
stats.last_peered = ps.last_peered;
stats.last_clean = ps.last_clean;
stats.last_unstale = ps.last_unstale;
stats.last_undegraded = ps.last_undegraded;
stats.last_fullsized = ps.last_fullsized;
stats.last_scrub_stamp = ps.last_scrub_stamp;
stats.last_deep_scrub_stamp = ps.last_deep_scrub_stamp;
stats.last_clean_scrub_stamp = ps.last_clean_scrub_stamp;
} else {
utime_t now = ceph_clock_now(g_ceph_context);
stats.last_fresh = now;
stats.last_active = now;
stats.last_change = now;
stats.last_peered = now;
stats.last_clean = now;
stats.last_unstale = now;
stats.last_undegraded = now;
stats.last_fullsized = now;
stats.last_scrub_stamp = now;
stats.last_deep_scrub_stamp = now;
stats.last_clean_scrub_stamp = now;
}
osd_map.pg_to_up_acting_osds(
pgid,
&stats.up,
&stats.up_primary,
&stats.acting,
&stats.acting_primary);
if (split_bits == 0) {
dout(10) << " will create " << pgid
<< " primary " << stats.acting_primary
<< " acting " << stats.acting
<< dendl;
} else {
dout(10) << " will create " << pgid
<< " primary " << stats.acting_primary
<< " acting " << stats.acting
<< " parent " << parent
<< " by " << split_bits << " bits"
<< dendl;
}
}
// This was PGMonitor::map_pg_creates
void ClusterState::_update_creating_pgs(
const OSDMap &osd_map,
PGMap::Incremental *pending_inc)
{
assert(pending_inc != nullptr);
dout(10) << "to " << pg_map.creating_pgs.size()
<< " pgs, osdmap epoch " << osd_map.get_epoch()
<< dendl;
for (set<pg_t>::const_iterator p = pg_map.creating_pgs.begin();
p != pg_map.creating_pgs.end();
++p) {
pg_t pgid = *p;
pg_t on = pgid;
ceph::unordered_map<pg_t,pg_stat_t>::const_iterator q =
pg_map.pg_stat.find(pgid);
assert(q != pg_map.pg_stat.end());
const pg_stat_t *s = &q->second;
if (s->parent_split_bits)
on = s->parent;
vector<int> up, acting;
int up_primary, acting_primary;
osd_map.pg_to_up_acting_osds(
on,
&up,
&up_primary,
&acting,
&acting_primary);
if (up != s->up ||
up_primary != s->up_primary ||
acting != s->acting ||
acting_primary != s->acting_primary) {
pg_stat_t *ns = &pending_inc->pg_stat_updates[pgid];
dout(20) << pgid << " "
<< " acting_primary: " << s->acting_primary
<< " -> " << acting_primary
<< " acting: " << s->acting << " -> " << acting
<< " up_primary: " << s->up_primary << " -> " << up_primary
<< " up: " << s->up << " -> " << up
<< dendl;
// only initialize if it wasn't already a pending update
if (ns->reported_epoch == 0)
*ns = *s;
// note epoch if the target of the create message changed
if (acting_primary != ns->acting_primary)
ns->mapping_epoch = osd_map.get_epoch();
ns->up = up;
ns->up_primary = up_primary;
ns->acting = acting;
ns->acting_primary = acting_primary;
}
}
// TODO: Complete the separation of PG state handling so
// that a cut-down set of functionality remains in PGMonitor
// while the full-blown PGMap lives only here.
}

View File

@ -2070,3 +2070,288 @@ void PGMap::dump_object_stat_sum(TextTable &tbl, Formatter *f,
}
void PGMapUpdater::check_osd_map(const OSDMap::Incremental &osd_inc,
std::set<int> *need_check_down_pg_osds,
std::map<int,utime_t> *last_osd_report,
PGMap *pg_map,
PGMap::Incremental *pending_inc)
{
for (const auto &p : osd_inc.new_weight) {
if (p.second == CEPH_OSD_OUT) {
dout(10) << __func__ << " osd." << p.first << " went OUT" << dendl;
pending_inc->stat_osd_out(p.first);
}
}
// this is conservative: we want to know if any osds (maybe) got marked down.
for (const auto &p : osd_inc.new_state) {
if (p.second & CEPH_OSD_UP) { // true if marked up OR down,
// but we're too lazy to check
// which
need_check_down_pg_osds->insert(p.first);
// clear out the last_osd_report for this OSD
map<int, utime_t>::iterator report = last_osd_report->find(p.first);
if (report != last_osd_report->end()) {
last_osd_report->erase(report);
}
// clear out osd_stat slow request histogram
dout(20) << __func__ << " clearing osd." << p.first
<< " request histogram" << dendl;
pending_inc->stat_osd_down_up(p.first, *pg_map);
}
if (p.second & CEPH_OSD_EXISTS) {
// whether it was created *or* destroyed, we can safely drop
// it's osd_stat_t record.
dout(10) << __func__ << " osd." << p.first
<< " created or destroyed" << dendl;
pending_inc->rm_stat(p.first);
// and adjust full, nearfull set
pg_map->nearfull_osds.erase(p.first);
pg_map->full_osds.erase(p.first);
}
}
}
void PGMapUpdater::register_pg(
const OSDMap &osd_map,
pg_t pgid, epoch_t epoch,
bool new_pool,
PGMap *pg_map,
PGMap::Incremental *pending_inc)
{
pg_t parent;
int split_bits = 0;
bool parent_found = false;
if (!new_pool) {
parent = pgid;
while (1) {
// remove most significant bit
int msb = cbits(parent.ps());
if (!msb)
break;
parent.set_ps(parent.ps() & ~(1<<(msb-1)));
split_bits++;
dout(30) << " is " << pgid << " parent " << parent << " ?" << dendl;
if (pg_map->pg_stat.count(parent) &&
pg_map->pg_stat[parent].state != PG_STATE_CREATING) {
dout(10) << " parent is " << parent << dendl;
parent_found = true;
break;
}
}
}
pg_stat_t &stats = pending_inc->pg_stat_updates[pgid];
stats.state = PG_STATE_CREATING;
stats.created = epoch;
stats.parent = parent;
stats.parent_split_bits = split_bits;
stats.mapping_epoch = epoch;
if (parent_found) {
pg_stat_t &ps = pg_map->pg_stat[parent];
stats.last_fresh = ps.last_fresh;
stats.last_active = ps.last_active;
stats.last_change = ps.last_change;
stats.last_peered = ps.last_peered;
stats.last_clean = ps.last_clean;
stats.last_unstale = ps.last_unstale;
stats.last_undegraded = ps.last_undegraded;
stats.last_fullsized = ps.last_fullsized;
stats.last_scrub_stamp = ps.last_scrub_stamp;
stats.last_deep_scrub_stamp = ps.last_deep_scrub_stamp;
stats.last_clean_scrub_stamp = ps.last_clean_scrub_stamp;
} else {
utime_t now = ceph_clock_now(g_ceph_context);
stats.last_fresh = now;
stats.last_active = now;
stats.last_change = now;
stats.last_peered = now;
stats.last_clean = now;
stats.last_unstale = now;
stats.last_undegraded = now;
stats.last_fullsized = now;
stats.last_scrub_stamp = now;
stats.last_deep_scrub_stamp = now;
stats.last_clean_scrub_stamp = now;
}
osd_map.pg_to_up_acting_osds(
pgid,
&stats.up,
&stats.up_primary,
&stats.acting,
&stats.acting_primary);
if (split_bits == 0) {
dout(10) << __func__ << " will create " << pgid
<< " primary " << stats.acting_primary
<< " acting " << stats.acting
<< dendl;
} else {
dout(10) << __func__ << " will create " << pgid
<< " primary " << stats.acting_primary
<< " acting " << stats.acting
<< " parent " << parent
<< " by " << split_bits << " bits"
<< dendl;
}
}
void PGMapUpdater::register_new_pgs(
const OSDMap &osd_map,
PGMap *pg_map,
PGMap::Incremental *pending_inc)
{
epoch_t epoch = osd_map.get_epoch();
dout(10) << __func__ << " checking pg pools for osdmap epoch " << epoch
<< ", last_pg_scan " << pg_map->last_pg_scan << dendl;
int created = 0;
for (const auto &p : osd_map.pools) {
int64_t poolid = p.first;
const pg_pool_t &pool = p.second;
int ruleno = osd_map.crush->find_rule(pool.get_crush_ruleset(),
pool.get_type(), pool.get_size());
if (ruleno < 0 || !osd_map.crush->rule_exists(ruleno))
continue;
if (pool.get_last_change() <= pg_map->last_pg_scan ||
pool.get_last_change() <= pending_inc->pg_scan) {
dout(10) << " no change in pool " << poolid << " " << pool << dendl;
continue;
}
dout(10) << __func__ << " scanning pool " << poolid
<< " " << pool << dendl;
// first pgs in this pool
bool new_pool = pg_map->pg_pool_sum.count(poolid) == 0;
for (ps_t ps = 0; ps < pool.get_pg_num(); ps++) {
pg_t pgid(ps, poolid, -1);
if (pg_map->pg_stat.count(pgid)) {
dout(20) << "register_new_pgs have " << pgid << dendl;
continue;
}
created++;
register_pg(osd_map, pgid, pool.get_last_change(), new_pool,
pg_map, pending_inc);
}
}
int removed = 0;
for (const auto &p : pg_map->creating_pgs) {
if (p.preferred() >= 0) {
dout(20) << " removing creating_pg " << p
<< " because it is localized and obsolete" << dendl;
pending_inc->pg_remove.insert(p);
removed++;
}
if (!osd_map.have_pg_pool(p.pool())) {
dout(20) << " removing creating_pg " << p
<< " because containing pool deleted" << dendl;
pending_inc->pg_remove.insert(p);
++removed;
}
}
// deleted pools?
for (const auto &p : pg_map->pg_stat) {
if (!osd_map.have_pg_pool(p.first.pool())) {
dout(20) << " removing pg_stat " << p.first << " because "
<< "containing pool deleted" << dendl;
pending_inc->pg_remove.insert(p.first);
++removed;
}
if (p.first.preferred() >= 0) {
dout(20) << " removing localized pg " << p.first << dendl;
pending_inc->pg_remove.insert(p.first);
++removed;
}
}
// we don't want to redo this work if we can avoid it.
pending_inc->pg_scan = epoch;
dout(10) << "register_new_pgs registered " << created << " new pgs, removed "
<< removed << " uncreated pgs" << dendl;
}
void PGMapUpdater::update_creating_pgs(
const OSDMap &osd_map,
PGMap *pg_map,
PGMap::Incremental *pending_inc)
{
dout(10) << __func__ << " to " << pg_map->creating_pgs.size()
<< " pgs, osdmap epoch " << osd_map.get_epoch()
<< dendl;
unsigned changed = 0;
for (set<pg_t>::const_iterator p = pg_map->creating_pgs.begin();
p != pg_map->creating_pgs.end();
++p) {
pg_t pgid = *p;
pg_t on = pgid;
ceph::unordered_map<pg_t,pg_stat_t>::const_iterator q =
pg_map->pg_stat.find(pgid);
assert(q != pg_map->pg_stat.end());
const pg_stat_t *s = &q->second;
if (s->parent_split_bits)
on = s->parent;
vector<int> up, acting;
int up_primary, acting_primary;
osd_map.pg_to_up_acting_osds(
on,
&up,
&up_primary,
&acting,
&acting_primary);
if (up != s->up ||
up_primary != s->up_primary ||
acting != s->acting ||
acting_primary != s->acting_primary) {
pg_stat_t *ns = &pending_inc->pg_stat_updates[pgid];
if (osd_map.get_epoch() > ns->reported_epoch) {
dout(20) << __func__ << " " << pgid << " "
<< " acting_primary: " << s->acting_primary
<< " -> " << acting_primary
<< " acting: " << s->acting << " -> " << acting
<< " up_primary: " << s->up_primary << " -> " << up_primary
<< " up: " << s->up << " -> " << up
<< dendl;
// only initialize if it wasn't already a pending update
if (ns->reported_epoch == 0)
*ns = *s;
// note epoch if the target of the create message changed
if (acting_primary != ns->acting_primary)
ns->mapping_epoch = osd_map.get_epoch();
ns->up = up;
ns->up_primary = up_primary;
ns->acting = acting;
ns->acting_primary = acting_primary;
++changed;
} else {
dout(20) << __func__ << " " << pgid << " has pending update from newer"
<< " epoch " << ns->reported_epoch
<< dendl;
}
}
}
if (changed) {
dout(10) << __func__ << " " << changed << " pgs changed primary" << dendl;
}
}

View File

@ -26,6 +26,10 @@
#include "osd/osd_types.h"
#include <sstream>
// FIXME: don't like including this here to get OSDMap::Incremental, maybe
// PGMapUpdater needs its own header.
#include "osd/OSDMap.h"
namespace ceph { class Formatter; }
class PGMap {
@ -382,4 +386,39 @@ inline ostream& operator<<(ostream& out, const PGMap& m) {
return out;
}
class PGMapUpdater
{
public:
static void check_osd_map(
const OSDMap::Incremental &osd_inc,
std::set<int> *need_check_down_pg_osds,
std::map<int,utime_t> *last_osd_report,
PGMap *pg_map,
PGMap::Incremental *pending_inc);
/**
* check latest osdmap for new pgs to register
*/
static void register_new_pgs(
const OSDMap &osd_map,
PGMap *pg_map,
PGMap::Incremental *pending_inc);
/**
* recalculate creating pg mappings
*/
static void update_creating_pgs(
const OSDMap &osd_map,
PGMap *pg_map,
PGMap::Incremental *pending_inc);
protected:
static void register_pg(
const OSDMap &osd_map,
pg_t pgid, epoch_t epoch,
bool new_pool,
PGMap *pg_map,
PGMap::Incremental *pending_inc);
};
#endif

View File

@ -67,7 +67,7 @@ void PGMonitor::on_restart()
void PGMonitor::on_active()
{
if (mon->is_leader()) {
check_osd_map(mon->osdmon()->osdmap.epoch);
check_osd_map(mon->osdmon()->osdmap.get_epoch());
need_check_down_pgs = true;
}
@ -911,54 +911,16 @@ void PGMonitor::check_osd_map(epoch_t epoch)
assert(bl.length());
OSDMap::Incremental inc(bl);
for (map<int32_t,uint32_t>::iterator p = inc.new_weight.begin();
p != inc.new_weight.end();
++p)
if (p->second == CEPH_OSD_OUT) {
dout(10) << __func__ << " osd." << p->first << " went OUT" << dendl;
pending_inc.stat_osd_out(p->first);
}
// this is conservative: we want to know if any osds (maybe) got marked down.
for (map<int32_t,uint8_t>::iterator p = inc.new_state.begin();
p != inc.new_state.end();
++p) {
if (p->second & CEPH_OSD_UP) { // true if marked up OR down,
// but we're too lazy to check
// which
need_check_down_pg_osds.insert(p->first);
// clear out the last_osd_report for this OSD
map<int, utime_t>::iterator report = last_osd_report.find(p->first);
if (report != last_osd_report.end()) {
last_osd_report.erase(report);
}
// clear out osd_stat slow request histogram
dout(20) << __func__ << " clearing osd." << p->first
<< " request histogram" << dendl;
pending_inc.stat_osd_down_up(p->first, pg_map);
}
if (p->second & CEPH_OSD_EXISTS) {
// whether it was created *or* destroyed, we can safely drop
// it's osd_stat_t record.
dout(10) << __func__ << " osd." << p->first
<< " created or destroyed" << dendl;
pending_inc.rm_stat(p->first);
// and adjust full, nearfull set
pg_map.nearfull_osds.erase(p->first);
pg_map.full_osds.erase(p->first);
}
}
PGMapUpdater::check_osd_map(inc, &need_check_down_pg_osds,
&last_osd_report, &pg_map, &pending_inc);
}
assert(pg_map.last_osdmap_epoch < epoch);
pending_inc.osdmap_epoch = epoch;
map_pg_creates();
register_new_pgs();
PGMapUpdater::update_creating_pgs(mon->osdmon()->osdmap,
&pg_map, &pending_inc);
PGMapUpdater::register_new_pgs(mon->osdmon()->osdmap, &pg_map, &pending_inc);
if (need_check_down_pgs || !need_check_down_pg_osds.empty())
check_down_pgs();
@ -966,244 +928,6 @@ void PGMonitor::check_osd_map(epoch_t epoch)
propose_pending();
}
void PGMonitor::register_pg(OSDMap *osdmap,
pg_pool_t& pool, pg_t pgid, epoch_t epoch,
bool new_pool)
{
pg_t parent;
int split_bits = 0;
bool parent_found = false;
if (!new_pool) {
parent = pgid;
while (1) {
// remove most significant bit
int msb = cbits(parent.ps());
if (!msb)
break;
parent.set_ps(parent.ps() & ~(1<<(msb-1)));
split_bits++;
dout(30) << " is " << pgid << " parent " << parent << " ?" << dendl;
if (pg_map.pg_stat.count(parent) &&
pg_map.pg_stat[parent].state != PG_STATE_CREATING) {
dout(10) << " parent is " << parent << dendl;
parent_found = true;
break;
}
}
}
pg_stat_t &stats = pending_inc.pg_stat_updates[pgid];
stats.state = PG_STATE_CREATING;
stats.created = epoch;
stats.parent = parent;
stats.parent_split_bits = split_bits;
stats.mapping_epoch = epoch;
if (parent_found) {
pg_stat_t &ps = pg_map.pg_stat[parent];
stats.last_fresh = ps.last_fresh;
stats.last_active = ps.last_active;
stats.last_change = ps.last_change;
stats.last_peered = ps.last_peered;
stats.last_clean = ps.last_clean;
stats.last_unstale = ps.last_unstale;
stats.last_undegraded = ps.last_undegraded;
stats.last_fullsized = ps.last_fullsized;
stats.last_scrub_stamp = ps.last_scrub_stamp;
stats.last_deep_scrub_stamp = ps.last_deep_scrub_stamp;
stats.last_clean_scrub_stamp = ps.last_clean_scrub_stamp;
} else {
utime_t now = ceph_clock_now(g_ceph_context);
stats.last_fresh = now;
stats.last_active = now;
stats.last_change = now;
stats.last_peered = now;
stats.last_clean = now;
stats.last_unstale = now;
stats.last_undegraded = now;
stats.last_fullsized = now;
stats.last_scrub_stamp = now;
stats.last_deep_scrub_stamp = now;
stats.last_clean_scrub_stamp = now;
}
osdmap->pg_to_up_acting_osds(
pgid,
&stats.up,
&stats.up_primary,
&stats.acting,
&stats.acting_primary);
if (split_bits == 0) {
dout(10) << __func__ << " will create " << pgid
<< " primary " << stats.acting_primary
<< " acting " << stats.acting
<< dendl;
} else {
dout(10) << __func__ << " will create " << pgid
<< " primary " << stats.acting_primary
<< " acting " << stats.acting
<< " parent " << parent
<< " by " << split_bits << " bits"
<< dendl;
}
}
void PGMonitor::register_new_pgs()
{
// iterate over crush mapspace
OSDMap *osdmap = &mon->osdmon()->osdmap;
epoch_t epoch = osdmap->get_epoch();
dout(10) << __func__ << " checking pg pools for osdmap epoch " << epoch
<< ", last_pg_scan " << pg_map.last_pg_scan << dendl;
int created = 0;
for (map<int64_t,pg_pool_t>::iterator p = osdmap->pools.begin();
p != osdmap->pools.end();
++p) {
int64_t poolid = p->first;
pg_pool_t &pool = p->second;
int ruleno = osdmap->crush->find_rule(pool.get_crush_ruleset(),
pool.get_type(), pool.get_size());
if (ruleno < 0 || !osdmap->crush->rule_exists(ruleno))
continue;
if (pool.get_last_change() <= pg_map.last_pg_scan ||
pool.get_last_change() <= pending_inc.pg_scan) {
dout(10) << " no change in pool " << p->first << " " << pool << dendl;
continue;
}
dout(10) << __func__ << " scanning pool " << p->first
<< " " << pool << dendl;
// first pgs in this pool
bool new_pool = pg_map.pg_pool_sum.count(poolid) == 0;
for (ps_t ps = 0; ps < pool.get_pg_num(); ps++) {
pg_t pgid(ps, poolid, -1);
if (pg_map.pg_stat.count(pgid)) {
dout(20) << "register_new_pgs have " << pgid << dendl;
continue;
}
created++;
register_pg(osdmap, pool, pgid, pool.get_last_change(), new_pool);
}
}
int removed = 0;
for (set<pg_t>::iterator p = pg_map.creating_pgs.begin();
p != pg_map.creating_pgs.end();
++p) {
if (p->preferred() >= 0) {
dout(20) << " removing creating_pg " << *p
<< " because it is localized and obsolete" << dendl;
pending_inc.pg_remove.insert(*p);
removed++;
}
if (!osdmap->have_pg_pool(p->pool())) {
dout(20) << " removing creating_pg " << *p
<< " because containing pool deleted" << dendl;
pending_inc.pg_remove.insert(*p);
++removed;
}
}
// deleted pools?
for (ceph::unordered_map<pg_t,pg_stat_t>::const_iterator p =
pg_map.pg_stat.begin();
p != pg_map.pg_stat.end(); ++p) {
if (!osdmap->have_pg_pool(p->first.pool())) {
dout(20) << " removing pg_stat " << p->first << " because "
<< "containing pool deleted" << dendl;
pending_inc.pg_remove.insert(p->first);
++removed;
}
if (p->first.preferred() >= 0) {
dout(20) << " removing localized pg " << p->first << dendl;
pending_inc.pg_remove.insert(p->first);
++removed;
}
}
// we don't want to redo this work if we can avoid it.
pending_inc.pg_scan = epoch;
dout(10) << "register_new_pgs registered " << created << " new pgs, removed "
<< removed << " uncreated pgs" << dendl;
}
void PGMonitor::map_pg_creates()
{
OSDMap *osdmap = &mon->osdmon()->osdmap;
dout(10) << __func__ << " to " << pg_map.creating_pgs.size()
<< " pgs, osdmap epoch " << osdmap->get_epoch()
<< dendl;
unsigned changed = 0;
for (set<pg_t>::const_iterator p = pg_map.creating_pgs.begin();
p != pg_map.creating_pgs.end();
++p) {
pg_t pgid = *p;
pg_t on = pgid;
ceph::unordered_map<pg_t,pg_stat_t>::const_iterator q =
pg_map.pg_stat.find(pgid);
assert(q != pg_map.pg_stat.end());
const pg_stat_t *s = &q->second;
if (s->parent_split_bits)
on = s->parent;
vector<int> up, acting;
int up_primary, acting_primary;
osdmap->pg_to_up_acting_osds(
on,
&up,
&up_primary,
&acting,
&acting_primary);
if (up != s->up ||
up_primary != s->up_primary ||
acting != s->acting ||
acting_primary != s->acting_primary) {
pg_stat_t *ns = &pending_inc.pg_stat_updates[pgid];
if (osdmap->get_epoch() > ns->reported_epoch) {
dout(20) << __func__ << " " << pgid << " "
<< " acting_primary: " << s->acting_primary
<< " -> " << acting_primary
<< " acting: " << s->acting << " -> " << acting
<< " up_primary: " << s->up_primary << " -> " << up_primary
<< " up: " << s->up << " -> " << up
<< dendl;
// only initialize if it wasn't already a pending update
if (ns->reported_epoch == 0)
*ns = *s;
// note epoch if the target of the create message changed
if (acting_primary != ns->acting_primary)
ns->mapping_epoch = osdmap->get_epoch();
ns->up = up;
ns->up_primary = up_primary;
ns->acting = acting;
ns->acting_primary = acting_primary;
++changed;
} else {
dout(20) << __func__ << " " << pgid << " has pending update from newer"
<< " epoch " << ns->reported_epoch
<< dendl;
}
}
}
if (changed) {
dout(10) << __func__ << " " << changed << " pgs changed primary" << dendl;
}
}
void PGMonitor::send_pg_creates()
{
// We only need to do this old, spammy way of broadcasting create messages

View File

@ -91,19 +91,8 @@ private:
// when we last received PG stats from each osd
map<int,utime_t> last_osd_report;
void register_pg(OSDMap *osdmap, pg_pool_t& pool, pg_t pgid,
epoch_t epoch, bool new_pool);
/**
* check latest osdmap for new pgs to register
*/
void register_new_pgs();
/**
* recalculate creating pg mappings
*/
void map_pg_creates();
void send_pg_creates();
epoch_t send_pg_creates(int osd, Connection *con, epoch_t next);

View File

@ -256,8 +256,7 @@ private:
friend class OSDMonitor;
// FIXME: the elements required for PGMap updates should be exposed properly
friend class PGMonitor;
friend class ClusterState;
friend class PGMapUpdater;
public:
OSDMap() : epoch(0),