osdc/Objecter: improve pool deletion detection

Currently we can have a race like so:

 - send op in epoch X
 - osd replies
 - pool deleted in epoch X+1
 - client gets X+1, sends map epoch check
 - client gets reply
   -> fails assert that there is no map check in flight

Avoid this situation by inferring that the pool is deleted when we see
that we previously sent the request but the pool is no longer present.
Since pool ids are not reused there is no point in doing a synchronous
map check at all.

Backport: giant
Fixes: #10372
Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2014-12-23 15:59:19 -08:00
parent a540ac3385
commit d7fd6fccc9

View File

@ -1271,16 +1271,25 @@ void Objecter::_check_op_pool_dne(Op *op, bool session_locked)
{
assert(rwlock.is_wlocked());
ldout(cct, 10) << "check_op_pool_dne tid " << op->tid
<< " current " << osdmap->get_epoch()
<< " map_dne_bound " << op->map_dne_bound
<< dendl;
if (op->attempts) {
// we send a reply earlier, which means that previously the pool
// existed, and now it does not (i.e., it was deleted).
op->map_dne_bound = osdmap->get_epoch();
ldout(cct, 10) << "check_op_pool_dne tid " << op->tid
<< " pool previously exists but now does not"
<< dendl;
} else {
ldout(cct, 10) << "check_op_pool_dne tid " << op->tid
<< " current " << osdmap->get_epoch()
<< " map_dne_bound " << op->map_dne_bound
<< dendl;
}
if (op->map_dne_bound > 0) {
if (osdmap->get_epoch() >= op->map_dne_bound) {
// we had a new enough map
ldout(cct, 10) << "check_op_pool_dne tid " << op->tid
<< " concluding pool " << op->target.base_pgid.pool() << " dne"
<< dendl;
<< " concluding pool " << op->target.base_pgid.pool()
<< " dne" << dendl;
if (op->onack) {
op->onack->complete(-ENOENT);
}
@ -1367,10 +1376,17 @@ void Objecter::_check_linger_pool_dne(LingerOp *op, bool *need_unregister)
*need_unregister = false;
ldout(cct, 10) << "_check_linger_pool_dne linger_id " << op->linger_id
<< " current " << osdmap->get_epoch()
<< " map_dne_bound " << op->map_dne_bound
<< dendl;
if (op->register_gen > 0) {
ldout(cct, 10) << "_check_linger_pool_dne linger_id " << op->linger_id
<< " pool previously existed but now does not"
<< dendl;
op->map_dne_bound = osdmap->get_epoch();
} else {
ldout(cct, 10) << "_check_linger_pool_dne linger_id " << op->linger_id
<< " current " << osdmap->get_epoch()
<< " map_dne_bound " << op->map_dne_bound
<< dendl;
}
if (op->map_dne_bound > 0) {
if (osdmap->get_epoch() >= op->map_dne_bound) {
if (op->on_reg_ack) {