mirror of
https://github.com/ceph/ceph
synced 2025-02-21 18:17:42 +00:00
OSD: resurrect a parent if it splits into the pg we want to create
When attempting to create a new pg object in response to a peering message, there are 3 cases: 1) That pg is currently being deleted. In this case, we cancel the deletion and resurrect the pg at the epoch at which it had been deleted. 2) A pg is being deleted which would have split into the pg we want to create had it not been deleted. In that case, we resurrect that pg at the map at which it had been deleted and let the request wait on the impending split. 3) Neither that pg nor a parent can be resurrected. In this case, we create a new pg at the map epoch of the peering request. Fixes: #5154 Signed-off-by: Samuel Just <sam.just@inktank.com>
This commit is contained in:
parent
d605eafd17
commit
951fc2fae4
193
src/osd/OSD.cc
193
src/osd/OSD.cc
@ -1639,9 +1639,68 @@ void OSD::add_newly_split_pg(PG *pg, PG::RecoveryCtx *rctx)
|
||||
_remove_pg(pg);
|
||||
}
|
||||
|
||||
OSD::res_result OSD::_try_resurrect_pg(
|
||||
OSDMapRef curmap, pg_t pgid, pg_t *resurrected, PGRef *old_pg_state)
|
||||
{
|
||||
assert(resurrected);
|
||||
assert(old_pg_state);
|
||||
// find nearest ancestor
|
||||
DeletingStateRef df;
|
||||
pg_t cur(pgid);
|
||||
while (cur.ps()) {
|
||||
df = service.deleting_pgs.lookup(pgid);
|
||||
if (df)
|
||||
break;
|
||||
cur = cur.get_parent();
|
||||
}
|
||||
if (!df)
|
||||
return RES_NONE; // good to go
|
||||
|
||||
df->old_pg_state->lock();
|
||||
OSDMapRef create_map = df->old_pg_state->get_osdmap();
|
||||
df->old_pg_state->unlock();
|
||||
|
||||
set<pg_t> children;
|
||||
if (cur == pgid) {
|
||||
if (df->try_stop_deletion()) {
|
||||
dout(10) << __func__ << ": halted deletion on pg " << pgid << dendl;
|
||||
*resurrected = cur;
|
||||
*old_pg_state = df->old_pg_state;
|
||||
service.deleting_pgs.remove(pgid); // PG is no longer being removed!
|
||||
return RES_SELF;
|
||||
} else {
|
||||
// raced, ensure we don't see DeletingStateRef when we try to
|
||||
// delete this pg
|
||||
service.deleting_pgs.remove(pgid);
|
||||
return RES_NONE;
|
||||
}
|
||||
} else if (cur.is_split(create_map->get_pg_num(cur.pool()),
|
||||
curmap->get_pg_num(cur.pool()),
|
||||
&children) &&
|
||||
children.count(pgid)) {
|
||||
if (df->try_stop_deletion()) {
|
||||
dout(10) << __func__ << ": halted deletion on ancestor pg " << pgid
|
||||
<< dendl;
|
||||
*resurrected = cur;
|
||||
*old_pg_state = df->old_pg_state;
|
||||
service.deleting_pgs.remove(pgid); // PG is no longer being removed!
|
||||
return RES_PARENT;
|
||||
} else {
|
||||
/* this is not a problem, failing to cancel proves that all objects
|
||||
* have been removed, so no hobject_t overlap is possible
|
||||
*/
|
||||
return RES_NONE;
|
||||
}
|
||||
}
|
||||
return RES_NONE;
|
||||
}
|
||||
|
||||
PG *OSD::_create_lock_pg(
|
||||
OSDMapRef createmap,
|
||||
pg_t pgid, bool newly_created, bool hold_map_lock,
|
||||
pg_t pgid,
|
||||
bool newly_created,
|
||||
bool hold_map_lock,
|
||||
bool backfill,
|
||||
int role, vector<int>& up, vector<int>& acting, pg_history_t history,
|
||||
pg_interval_map_t& pi,
|
||||
ObjectStore::Transaction& t)
|
||||
@ -1651,22 +1710,7 @@ PG *OSD::_create_lock_pg(
|
||||
|
||||
PG *pg = _open_lock_pg(createmap, pgid, true, hold_map_lock);
|
||||
|
||||
DeletingStateRef df = service.deleting_pgs.lookup(pgid);
|
||||
bool backfill = false;
|
||||
|
||||
if (df && df->try_stop_deletion()) {
|
||||
dout(10) << __func__ << ": halted deletion on pg " << pgid << dendl;
|
||||
backfill = true;
|
||||
service.deleting_pgs.remove(pgid); // PG is no longer being removed!
|
||||
} else {
|
||||
if (df) {
|
||||
// raced, ensure we don't see DeletingStateRef when we try to
|
||||
// delete this pg
|
||||
service.deleting_pgs.remove(pgid);
|
||||
}
|
||||
// either it's not deleting, or we failed to get to it in time
|
||||
t.create_collection(coll_t(pgid));
|
||||
}
|
||||
service.init_splits_between(pgid, pg->get_osdmap(), service.get_osdmap());
|
||||
|
||||
pg->init(role, up, acting, history, pi, backfill, &t);
|
||||
|
||||
@ -1980,8 +2024,6 @@ void OSD::handle_pg_peering_evt(
|
||||
return;
|
||||
}
|
||||
|
||||
PG *pg;
|
||||
|
||||
if (!_have_pg(info.pgid)) {
|
||||
// same primary?
|
||||
if (!osdmap->have_pg_pool(info.pgid.pool()))
|
||||
@ -2028,24 +2070,104 @@ void OSD::handle_pg_peering_evt(
|
||||
assert(!info.dne()); // and pg exists if we are hearing about it
|
||||
}
|
||||
|
||||
// ok, create PG locally using provided Info and History
|
||||
// do we need to resurrect a deleting pg?
|
||||
pg_t resurrected;
|
||||
PGRef old_pg_state;
|
||||
res_result result = _try_resurrect_pg(
|
||||
service.get_osdmap(),
|
||||
info.pgid,
|
||||
&resurrected,
|
||||
&old_pg_state);
|
||||
|
||||
PG::RecoveryCtx rctx = create_context();
|
||||
pg = _create_lock_pg(
|
||||
get_map(epoch),
|
||||
info.pgid, create, false, role, up, acting, history, pi,
|
||||
*rctx.transaction);
|
||||
pg->handle_create(&rctx);
|
||||
pg->write_if_dirty(*rctx.transaction);
|
||||
dispatch_context(rctx, pg, osdmap);
|
||||
switch (result) {
|
||||
case RES_NONE: {
|
||||
// ok, create the pg locally using provided Info and History
|
||||
rctx.transaction->create_collection(coll_t(info.pgid));
|
||||
PG *pg = _create_lock_pg(
|
||||
get_map(epoch),
|
||||
info.pgid, create, false, result == RES_SELF,
|
||||
role, up, acting, history, pi,
|
||||
*rctx.transaction);
|
||||
pg->handle_create(&rctx);
|
||||
pg->write_if_dirty(*rctx.transaction);
|
||||
dispatch_context(rctx, pg, osdmap);
|
||||
|
||||
dout(10) << *pg << " is new" << dendl;
|
||||
|
||||
// kick any waiters
|
||||
wake_pg_waiters(pg->info.pgid);
|
||||
|
||||
dout(10) << *pg << " is new" << dendl;
|
||||
pg->queue_peering_event(evt);
|
||||
pg->unlock();
|
||||
return;
|
||||
}
|
||||
case RES_SELF: {
|
||||
old_pg_state->lock();
|
||||
PG *pg = _create_lock_pg(
|
||||
old_pg_state->get_osdmap(),
|
||||
resurrected,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
old_pg_state->role,
|
||||
old_pg_state->up,
|
||||
old_pg_state->acting,
|
||||
old_pg_state->info.history,
|
||||
old_pg_state->past_intervals,
|
||||
*rctx.transaction);
|
||||
old_pg_state->unlock();
|
||||
pg->handle_create(&rctx);
|
||||
pg->write_if_dirty(*rctx.transaction);
|
||||
dispatch_context(rctx, pg, osdmap);
|
||||
|
||||
// kick any waiters
|
||||
wake_pg_waiters(pg->info.pgid);
|
||||
dout(10) << *pg << " is new (resurrected)" << dendl;
|
||||
|
||||
// kick any waiters
|
||||
wake_pg_waiters(pg->info.pgid);
|
||||
|
||||
pg->queue_peering_event(evt);
|
||||
pg->unlock();
|
||||
return;
|
||||
}
|
||||
case RES_PARENT: {
|
||||
assert(old_pg_state);
|
||||
old_pg_state->lock();
|
||||
PG *parent = _create_lock_pg(
|
||||
old_pg_state->get_osdmap(),
|
||||
resurrected,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
old_pg_state->role,
|
||||
old_pg_state->up,
|
||||
old_pg_state->acting,
|
||||
old_pg_state->info.history,
|
||||
old_pg_state->past_intervals,
|
||||
*rctx.transaction
|
||||
);
|
||||
old_pg_state->unlock();
|
||||
parent->handle_create(&rctx);
|
||||
parent->write_if_dirty(*rctx.transaction);
|
||||
dispatch_context(rctx, parent, osdmap);
|
||||
|
||||
dout(10) << *parent << " is new" << dendl;
|
||||
|
||||
// kick any waiters
|
||||
wake_pg_waiters(parent->info.pgid);
|
||||
|
||||
assert(service.splitting(info.pgid));
|
||||
peering_wait_for_split[info.pgid].push_back(evt);
|
||||
|
||||
//parent->queue_peering_event(evt);
|
||||
parent->queue_null(osdmap->get_epoch(), osdmap->get_epoch());
|
||||
parent->unlock();
|
||||
return;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// already had it. did the mapping change?
|
||||
pg = _lookup_lock_pg(info.pgid);
|
||||
PG *pg = _lookup_lock_pg(info.pgid);
|
||||
if (epoch < pg->info.history.same_interval_since) {
|
||||
dout(10) << *pg << " get_or_create_pg acting changed in "
|
||||
<< pg->info.history.same_interval_since
|
||||
@ -2053,10 +2175,10 @@ void OSD::handle_pg_peering_evt(
|
||||
pg->unlock();
|
||||
return;
|
||||
}
|
||||
pg->queue_peering_event(evt);
|
||||
pg->unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
pg->queue_peering_event(evt);
|
||||
pg->unlock();
|
||||
}
|
||||
|
||||
|
||||
@ -5391,10 +5513,11 @@ void OSD::handle_pg_create(OpRequestRef op)
|
||||
if (can_create_pg(pgid)) {
|
||||
pg_interval_map_t pi;
|
||||
pg = _create_lock_pg(
|
||||
osdmap, pgid, true, false,
|
||||
osdmap, pgid, true, false, false,
|
||||
0, creating_pgs[pgid].acting, creating_pgs[pgid].acting,
|
||||
history, pi,
|
||||
*rctx.transaction);
|
||||
rctx.transaction->create_collection(coll_t(pgid));
|
||||
pg->info.last_epoch_started = pg->info.history.last_epoch_started;
|
||||
creating_pgs.erase(pgid);
|
||||
wake_pg_waiters(pg->info.pgid);
|
||||
|
@ -1049,10 +1049,21 @@ protected:
|
||||
PG *_open_lock_pg(OSDMapRef createmap,
|
||||
pg_t pg, bool no_lockdep_check=false,
|
||||
bool hold_map_lock=false);
|
||||
enum res_result {
|
||||
RES_PARENT, // resurrected a parent
|
||||
RES_SELF, // resurrected self
|
||||
RES_NONE // nothing relevant deleting
|
||||
};
|
||||
res_result _try_resurrect_pg(
|
||||
OSDMapRef curmap, pg_t pgid, pg_t *resurrected, PGRef *old_pg_state);
|
||||
PG *_create_lock_pg(OSDMapRef createmap,
|
||||
pg_t pgid, bool newly_created,
|
||||
bool hold_map_lock, int role,
|
||||
vector<int>& up, vector<int>& acting,
|
||||
pg_t pgid,
|
||||
bool newly_created,
|
||||
bool hold_map_lock,
|
||||
bool backfill,
|
||||
int role,
|
||||
vector<int>& up,
|
||||
vector<int>& acting,
|
||||
pg_history_t history,
|
||||
pg_interval_map_t& pi,
|
||||
ObjectStore::Transaction& t);
|
||||
|
Loading…
Reference in New Issue
Block a user