Merge pull request #8033 from liewegas/wip-pg-create

osd: handle dup pg_create that races with pg deletion

Reviewed-by: Samuel Just <sjust@redhat.com>
This commit is contained in:
Sage Weil 2016-03-15 08:35:36 -04:00
commit 17f810573c
5 changed files with 72 additions and 69 deletions

View File

@ -459,7 +459,7 @@ void PGMap::remove_osd(int osd)
}
}
void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s, bool nocreating,
void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s,
bool sameosds)
{
pg_pool_sum[pgid.pool()].add(s);
@ -468,12 +468,10 @@ void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s, bool nocreating,
num_pg++;
num_pg_by_state[s.state]++;
if (!nocreating) {
if (s.state & PG_STATE_CREATING) {
creating_pgs.insert(pgid);
if (s.acting_primary >= 0) {
creating_pgs_by_osd_epoch[s.acting_primary][s.mapping_epoch].insert(pgid);
}
if (s.state & PG_STATE_CREATING) {
creating_pgs.insert(pgid);
if (s.acting_primary >= 0) {
creating_pgs_by_osd_epoch[s.acting_primary][s.mapping_epoch].insert(pgid);
}
}
@ -492,7 +490,7 @@ void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s, bool nocreating,
pg_by_osd[*p].insert(pgid);
}
void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, bool nocreating,
void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,
bool sameosds)
{
pool_stat_t& ps = pg_pool_sum[pgid.pool()];
@ -507,17 +505,15 @@ void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, bool nocreating,
if (end == 0)
num_pg_by_state.erase(s.state);
if (!nocreating) {
if (s.state & PG_STATE_CREATING) {
creating_pgs.erase(pgid);
if (s.acting_primary >= 0) {
map<epoch_t,set<pg_t> >& r = creating_pgs_by_osd_epoch[s.acting_primary];
r[s.mapping_epoch].erase(pgid);
if (r[s.mapping_epoch].empty())
r.erase(s.mapping_epoch);
if (r.empty())
creating_pgs_by_osd_epoch.erase(s.acting_primary);
}
if (s.state & PG_STATE_CREATING) {
creating_pgs.erase(pgid);
if (s.acting_primary >= 0) {
map<epoch_t,set<pg_t> >& r = creating_pgs_by_osd_epoch[s.acting_primary];
r[s.mapping_epoch].erase(pgid);
if (r[s.mapping_epoch].empty())
r.erase(s.mapping_epoch);
if (r.empty())
creating_pgs_by_osd_epoch.erase(s.acting_primary);
}
}
@ -559,9 +555,9 @@ void PGMap::stat_pg_update(const pg_t pgid, pg_stat_t& s,
s.up == n.up &&
s.blocked_by == n.blocked_by;
stat_pg_sub(pgid, s, false, sameosds);
stat_pg_sub(pgid, s, sameosds);
s = n;
stat_pg_add(pgid, n, false, sameosds);
stat_pg_add(pgid, n, sameosds);
}
void PGMap::stat_osd_add(const osd_stat_t &s)

View File

@ -258,9 +258,9 @@ public:
void redo_full_sets();
void register_nearfull_status(int osd, const osd_stat_t& s);
void calc_stats();
void stat_pg_add(const pg_t &pgid, const pg_stat_t &s, bool nocreating=false,
void stat_pg_add(const pg_t &pgid, const pg_stat_t &s,
bool sameosds=false);
void stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, bool nocreating=false,
void stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,
bool sameosds=false);
void stat_pg_update(const pg_t pgid, pg_stat_t &prev, bufferlist::iterator& blp);
void stat_osd_add(const osd_stat_t &s);

View File

@ -440,16 +440,19 @@ void PGMonitor::apply_pgmap_delta(bufferlist& bl)
r = -ENOENT;
} else {
r = mon->store->get(pgmap_pg_prefix, stringify(pgid), pgbl);
dout(20) << " refreshing pg " << pgid << " got " << r << " len "
<< pgbl.length() << dendl;
if (pg_pool_sum_old.count(pgid.pool()) == 0)
pg_pool_sum_old[pgid.pool()] = pg_map.pg_pool_sum[pgid.pool()];
}
if (r >= 0) {
pg_map.update_pg(pgid, pgbl);
dout(20) << " refreshing pg " << pgid
<< " " << pg_map.pg_stat[pgid].reported_epoch
<< ":" << pg_map.pg_stat[pgid].reported_seq
<< " " << pg_state_string(pg_map.pg_stat[pgid].state)
<< dendl;
} else {
dout(20) << " removing pg " << pgid << dendl;
pg_map.remove_pg(pgid);
if (pgid.ps() == 0)
deleted_pools.insert(pgid.pool());

View File

@ -3402,11 +3402,10 @@ void OSD::build_past_intervals_parallel()
*/
void OSD::handle_pg_peering_evt(
spg_t pgid,
const pg_info_t& info,
const pg_history_t& orig_history,
pg_interval_map_t& pi,
epoch_t epoch,
pg_shard_t from,
bool primary,
bool same_primary,
PG::CephPeeringEvtRef evt)
{
if (service.splitting(pgid)) {
@ -3424,11 +3423,18 @@ void OSD::handle_pg_peering_evt(
pgid.pgid, &up, &up_primary, &acting, &acting_primary);
int role = osdmap->calc_pg_role(whoami, acting, acting.size());
pg_history_t history = info.history;
pg_history_t history = orig_history;
bool valid_history = project_pg_history(
pgid, history, epoch, up, up_primary, acting, acting_primary);
if (!valid_history || epoch < history.same_interval_since) {
if (same_primary && epoch < history.same_primary_since) {
dout(10) << "get_or_create_pg " << pgid << " primary changed in "
<< history.same_primary_since << " (msg from " << epoch << ")"
<< dendl;
return;
}
if (!valid_history ||
(!same_primary && epoch < history.same_interval_since)) {
dout(10) << "get_or_create_pg " << pgid << " acting changed in "
<< history.same_interval_since << " (msg from " << epoch << ")"
<< dendl;
@ -3554,7 +3560,15 @@ void OSD::handle_pg_peering_evt(
} else {
// already had it. did the mapping change?
PG *pg = _lookup_lock_pg(pgid);
if (epoch < pg->info.history.same_interval_since) {
if (same_primary && epoch < pg->info.history.same_primary_since) {
dout(10) << "get_or_create_pg " << pgid << " primary changed in "
<< pg->info.history.same_primary_since
<< " (msg from " << epoch << ")"
<< dendl;
pg->unlock();
return;
}
if (!same_primary && epoch < pg->info.history.same_interval_since) {
dout(10) << *pg << " get_or_create_pg acting changed in "
<< pg->info.history.same_interval_since
<< " (msg from " << epoch << ")" << dendl;
@ -7414,42 +7428,32 @@ void OSD::handle_pg_create(OpRequestRef op)
bool mapped = osdmap->get_primary_shard(on, &pgid);
assert(mapped);
// does it already exist?
if (_have_pg(pgid)) {
dout(10) << "mkpg " << pgid << " already exists, skipping" << dendl;
continue;
}
pg_interval_map_t pi;
pg_history_t history;
history.epoch_created = created;
history.last_scrub_stamp = ci->second;
history.last_deep_scrub_stamp = ci->second;
// project history from created epoch (handle_pg_peering_evt does
// it from msg send epoch)
bool valid_history = project_pg_history(
pgid, history, created, up, up_primary, acting, acting_primary);
/* the pg creation message must have come from a mon and therefore
* cannot be on the other side of a map gap
*/
// the pg creation message must have come from a mon and therefore
// cannot be on the other side of a map gap
assert(valid_history);
PG::RecoveryCtx rctx = create_context();
const pg_pool_t* pp = osdmap->get_pg_pool(pgid.pool());
PG::_create(*rctx.transaction, pgid, pgid.get_split_bits(pp->get_pg_num()));
PG::_init(*rctx.transaction, pgid, pp);
pg_interval_map_t pi;
PG *pg = _create_lock_pg(
osdmap, pgid, false, false,
0, up, up_primary,
acting, acting_primary,
history, pi,
*rctx.transaction);
pg->info.last_epoch_started = created;
pg->handle_create(&rctx);
pg->write_if_dirty(*rctx.transaction);
pg->publish_stats_to_osd();
pg->unlock();
wake_pg_waiters(pgid);
dispatch_context(rctx, pg, osdmap);
handle_pg_peering_evt(
pgid,
history,
pi,
m->epoch,
true, // same primary, bc this is a create
PG::CephPeeringEvtRef(
new PG::CephPeeringEvt(
m->epoch,
m->epoch,
PG::NullEvt()))
);
}
last_pg_create_epoch = m->epoch;
@ -7674,8 +7678,9 @@ void OSD::handle_pg_notify(OpRequestRef op)
handle_pg_peering_evt(
spg_t(it->first.info.pgid.pgid, it->first.to),
it->first.info, it->second,
it->first.query_epoch, pg_shard_t(from, it->first.from), true,
it->first.info.history, it->second,
it->first.query_epoch,
false, // same interval
PG::CephPeeringEvtRef(
new PG::CephPeeringEvt(
it->first.epoch_sent, it->first.query_epoch,
@ -7706,8 +7711,8 @@ void OSD::handle_pg_log(OpRequestRef op)
op->mark_started();
handle_pg_peering_evt(
spg_t(m->info.pgid.pgid, m->to),
m->info, m->past_intervals, m->get_epoch(),
pg_shard_t(from, m->from), false,
m->info.history, m->past_intervals, m->get_epoch(),
false, // same interval
PG::CephPeeringEvtRef(
new PG::CephPeeringEvt(
m->get_epoch(), m->get_query_epoch(),
@ -7740,8 +7745,8 @@ void OSD::handle_pg_info(OpRequestRef op)
handle_pg_peering_evt(
spg_t(p->first.info.pgid.pgid, p->first.to),
p->first.info, p->second, p->first.epoch_sent,
pg_shard_t(from, p->first.from), false,
p->first.info.history, p->second, p->first.epoch_sent,
false, // same interval
PG::CephPeeringEvtRef(
new PG::CephPeeringEvt(
p->first.epoch_sent, p->first.query_epoch,

View File

@ -1961,11 +1961,10 @@ protected:
void handle_pg_peering_evt(
spg_t pgid,
const pg_info_t& info,
const pg_history_t& orig_history,
pg_interval_map_t& pi,
epoch_t epoch,
pg_shard_t from,
bool primary,
bool same_primary,
PG::CephPeeringEvtRef evt);
void load_pgs();