OSD: resurrect a parent if it splits into the pg we want to create

When attempting to create a new pg object in response to a
peering message, there are 3 cases:
1) That pg is currently being deleted.  In this case, we
cancel the deletion and resurrect the pg at the epoch at
which it had been deleted.
2) A pg is being deleted which would have split into the
pg we want to create had it not been deleted.  In that case,
we resurrect that pg at the map at which it had been deleted
and let the request wait on the impending split.
3) Neither that pg nor a parent can be resurrected.  In this
case, we create a new pg at the map epoch of the peering
request.

Fixes: #5154
Signed-off-by: Samuel Just <sam.just@inktank.com>
This commit is contained in:
Samuel Just 2013-05-30 15:11:58 -07:00
parent d605eafd17
commit 951fc2fae4
2 changed files with 172 additions and 38 deletions

View File

@ -1639,9 +1639,68 @@ void OSD::add_newly_split_pg(PG *pg, PG::RecoveryCtx *rctx)
_remove_pg(pg);
}
OSD::res_result OSD::_try_resurrect_pg(
OSDMapRef curmap, pg_t pgid, pg_t *resurrected, PGRef *old_pg_state)
{
assert(resurrected);
assert(old_pg_state);
// find nearest ancestor
DeletingStateRef df;
pg_t cur(pgid);
while (cur.ps()) {
df = service.deleting_pgs.lookup(pgid);
if (df)
break;
cur = cur.get_parent();
}
if (!df)
return RES_NONE; // good to go
df->old_pg_state->lock();
OSDMapRef create_map = df->old_pg_state->get_osdmap();
df->old_pg_state->unlock();
set<pg_t> children;
if (cur == pgid) {
if (df->try_stop_deletion()) {
dout(10) << __func__ << ": halted deletion on pg " << pgid << dendl;
*resurrected = cur;
*old_pg_state = df->old_pg_state;
service.deleting_pgs.remove(pgid); // PG is no longer being removed!
return RES_SELF;
} else {
// raced, ensure we don't see DeletingStateRef when we try to
// delete this pg
service.deleting_pgs.remove(pgid);
return RES_NONE;
}
} else if (cur.is_split(create_map->get_pg_num(cur.pool()),
curmap->get_pg_num(cur.pool()),
&children) &&
children.count(pgid)) {
if (df->try_stop_deletion()) {
dout(10) << __func__ << ": halted deletion on ancestor pg " << pgid
<< dendl;
*resurrected = cur;
*old_pg_state = df->old_pg_state;
service.deleting_pgs.remove(pgid); // PG is no longer being removed!
return RES_PARENT;
} else {
/* this is not a problem, failing to cancel proves that all objects
* have been removed, so no hobject_t overlap is possible
*/
return RES_NONE;
}
}
return RES_NONE;
}
PG *OSD::_create_lock_pg(
OSDMapRef createmap,
pg_t pgid, bool newly_created, bool hold_map_lock,
pg_t pgid,
bool newly_created,
bool hold_map_lock,
bool backfill,
int role, vector<int>& up, vector<int>& acting, pg_history_t history,
pg_interval_map_t& pi,
ObjectStore::Transaction& t)
@ -1651,22 +1710,7 @@ PG *OSD::_create_lock_pg(
PG *pg = _open_lock_pg(createmap, pgid, true, hold_map_lock);
DeletingStateRef df = service.deleting_pgs.lookup(pgid);
bool backfill = false;
if (df && df->try_stop_deletion()) {
dout(10) << __func__ << ": halted deletion on pg " << pgid << dendl;
backfill = true;
service.deleting_pgs.remove(pgid); // PG is no longer being removed!
} else {
if (df) {
// raced, ensure we don't see DeletingStateRef when we try to
// delete this pg
service.deleting_pgs.remove(pgid);
}
// either it's not deleting, or we failed to get to it in time
t.create_collection(coll_t(pgid));
}
service.init_splits_between(pgid, pg->get_osdmap(), service.get_osdmap());
pg->init(role, up, acting, history, pi, backfill, &t);
@ -1980,8 +2024,6 @@ void OSD::handle_pg_peering_evt(
return;
}
PG *pg;
if (!_have_pg(info.pgid)) {
// same primary?
if (!osdmap->have_pg_pool(info.pgid.pool()))
@ -2028,24 +2070,104 @@ void OSD::handle_pg_peering_evt(
assert(!info.dne()); // and pg exists if we are hearing about it
}
// ok, create PG locally using provided Info and History
// do we need to resurrect a deleting pg?
pg_t resurrected;
PGRef old_pg_state;
res_result result = _try_resurrect_pg(
service.get_osdmap(),
info.pgid,
&resurrected,
&old_pg_state);
PG::RecoveryCtx rctx = create_context();
pg = _create_lock_pg(
get_map(epoch),
info.pgid, create, false, role, up, acting, history, pi,
*rctx.transaction);
pg->handle_create(&rctx);
pg->write_if_dirty(*rctx.transaction);
dispatch_context(rctx, pg, osdmap);
switch (result) {
case RES_NONE: {
// ok, create the pg locally using provided Info and History
rctx.transaction->create_collection(coll_t(info.pgid));
PG *pg = _create_lock_pg(
get_map(epoch),
info.pgid, create, false, result == RES_SELF,
role, up, acting, history, pi,
*rctx.transaction);
pg->handle_create(&rctx);
pg->write_if_dirty(*rctx.transaction);
dispatch_context(rctx, pg, osdmap);
dout(10) << *pg << " is new" << dendl;
// kick any waiters
wake_pg_waiters(pg->info.pgid);
dout(10) << *pg << " is new" << dendl;
pg->queue_peering_event(evt);
pg->unlock();
return;
}
case RES_SELF: {
old_pg_state->lock();
PG *pg = _create_lock_pg(
old_pg_state->get_osdmap(),
resurrected,
false,
false,
true,
old_pg_state->role,
old_pg_state->up,
old_pg_state->acting,
old_pg_state->info.history,
old_pg_state->past_intervals,
*rctx.transaction);
old_pg_state->unlock();
pg->handle_create(&rctx);
pg->write_if_dirty(*rctx.transaction);
dispatch_context(rctx, pg, osdmap);
// kick any waiters
wake_pg_waiters(pg->info.pgid);
dout(10) << *pg << " is new (resurrected)" << dendl;
// kick any waiters
wake_pg_waiters(pg->info.pgid);
pg->queue_peering_event(evt);
pg->unlock();
return;
}
case RES_PARENT: {
assert(old_pg_state);
old_pg_state->lock();
PG *parent = _create_lock_pg(
old_pg_state->get_osdmap(),
resurrected,
false,
false,
true,
old_pg_state->role,
old_pg_state->up,
old_pg_state->acting,
old_pg_state->info.history,
old_pg_state->past_intervals,
*rctx.transaction
);
old_pg_state->unlock();
parent->handle_create(&rctx);
parent->write_if_dirty(*rctx.transaction);
dispatch_context(rctx, parent, osdmap);
dout(10) << *parent << " is new" << dendl;
// kick any waiters
wake_pg_waiters(parent->info.pgid);
assert(service.splitting(info.pgid));
peering_wait_for_split[info.pgid].push_back(evt);
//parent->queue_peering_event(evt);
parent->queue_null(osdmap->get_epoch(), osdmap->get_epoch());
parent->unlock();
return;
}
}
} else {
// already had it. did the mapping change?
pg = _lookup_lock_pg(info.pgid);
PG *pg = _lookup_lock_pg(info.pgid);
if (epoch < pg->info.history.same_interval_since) {
dout(10) << *pg << " get_or_create_pg acting changed in "
<< pg->info.history.same_interval_since
@ -2053,10 +2175,10 @@ void OSD::handle_pg_peering_evt(
pg->unlock();
return;
}
pg->queue_peering_event(evt);
pg->unlock();
return;
}
pg->queue_peering_event(evt);
pg->unlock();
}
@ -5391,10 +5513,11 @@ void OSD::handle_pg_create(OpRequestRef op)
if (can_create_pg(pgid)) {
pg_interval_map_t pi;
pg = _create_lock_pg(
osdmap, pgid, true, false,
osdmap, pgid, true, false, false,
0, creating_pgs[pgid].acting, creating_pgs[pgid].acting,
history, pi,
*rctx.transaction);
rctx.transaction->create_collection(coll_t(pgid));
pg->info.last_epoch_started = pg->info.history.last_epoch_started;
creating_pgs.erase(pgid);
wake_pg_waiters(pg->info.pgid);

View File

@ -1049,10 +1049,21 @@ protected:
PG *_open_lock_pg(OSDMapRef createmap,
pg_t pg, bool no_lockdep_check=false,
bool hold_map_lock=false);
enum res_result {
RES_PARENT, // resurrected a parent
RES_SELF, // resurrected self
RES_NONE // nothing relevant deleting
};
res_result _try_resurrect_pg(
OSDMapRef curmap, pg_t pgid, pg_t *resurrected, PGRef *old_pg_state);
PG *_create_lock_pg(OSDMapRef createmap,
pg_t pgid, bool newly_created,
bool hold_map_lock, int role,
vector<int>& up, vector<int>& acting,
pg_t pgid,
bool newly_created,
bool hold_map_lock,
bool backfill,
int role,
vector<int>& up,
vector<int>& acting,
pg_history_t history,
pg_interval_map_t& pi,
ObjectStore::Transaction& t);