Merge pull request #18524 from liewegas/wip-backfill-rops

osd/PG: fix recovery op leak

Reviewed-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
Sage Weil 2017-10-26 17:48:25 -05:00 committed by GitHub
commit 3f6e0b6e80
2 changed files with 37 additions and 74 deletions

View File

@ -2209,7 +2209,6 @@ void PG::start_recovery_op(const hobject_t& soid)
assert(recovery_ops_active >= 0);
recovery_ops_active++;
#ifdef DEBUG_RECOVERY_OIDS
assert(recovering_oids.count(soid) == 0);
recovering_oids.insert(soid);
#endif
osd->start_recovery_op(this, soid);
@ -2226,7 +2225,7 @@ void PG::finish_recovery_op(const hobject_t& soid, bool dequeue)
recovery_ops_active--;
#ifdef DEBUG_RECOVERY_OIDS
assert(recovering_oids.count(soid));
recovering_oids.erase(soid);
recovering_oids.erase(recovering_oids.find(soid));
#endif
osd->finish_recovery_op(this, soid, dequeue);
@ -6413,16 +6412,11 @@ PG::RecoveryState::Backfilling::Backfilling(my_context ctx)
pg->publish_stats_to_osd();
}
boost::statechart::result
PG::RecoveryState::Backfilling::react(const DeferBackfill &c)
void PG::RecoveryState::Backfilling::cancel_backfill()
{
PG *pg = context< RecoveryMachine >().pg;
ldout(pg->cct, 10) << "defer backfill, retry delay " << c.delay << dendl;
pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
pg->state_set(PG_STATE_BACKFILL_WAIT);
pg->state_clear(PG_STATE_BACKFILLING);
for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
it != pg->backfill_targets.end();
++it) {
@ -6439,8 +6433,20 @@ PG::RecoveryState::Backfilling::react(const DeferBackfill &c)
}
}
pg->waiting_on_backfill.clear();
if (!pg->waiting_on_backfill.empty()) {
pg->waiting_on_backfill.clear();
pg->finish_recovery_op(hobject_t::get_max());
}
}
boost::statechart::result
PG::RecoveryState::Backfilling::react(const DeferBackfill &c)
{
PG *pg = context< RecoveryMachine >().pg;
ldout(pg->cct, 10) << "defer backfill, retry delay " << c.delay << dendl;
pg->state_set(PG_STATE_BACKFILL_WAIT);
pg->state_clear(PG_STATE_BACKFILLING);
cancel_backfill();
pg->schedule_backfill_retry(c.delay);
return transit<NotBackfilling>();
}
@ -6450,29 +6456,9 @@ PG::RecoveryState::Backfilling::react(const UnfoundBackfill &c)
{
PG *pg = context< RecoveryMachine >().pg;
ldout(pg->cct, 10) << "backfill has unfound, can't continue" << dendl;
pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
pg->state_set(PG_STATE_BACKFILL_UNFOUND);
pg->state_clear(PG_STATE_BACKFILLING);
for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
it != pg->backfill_targets.end();
++it) {
assert(*it != pg->pg_whoami);
ConnectionRef con = pg->osd->get_con_osd_cluster(
it->osd, pg->get_osdmap()->get_epoch());
if (con) {
pg->osd->send_message_osd_cluster(
new MBackfillReserve(
MBackfillReserve::RELEASE,
spg_t(pg->info.pgid.pgid, it->shard),
pg->get_osdmap()->get_epoch()),
con.get());
}
}
pg->waiting_on_backfill.clear();
cancel_backfill();
return transit<NotBackfilling>();
}
@ -6480,28 +6466,9 @@ boost::statechart::result
PG::RecoveryState::Backfilling::react(const RemoteReservationRevokedTooFull &)
{
PG *pg = context< RecoveryMachine >().pg;
pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
pg->state_set(PG_STATE_BACKFILL_TOOFULL);
for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
it != pg->backfill_targets.end();
++it) {
assert(*it != pg->pg_whoami);
ConnectionRef con = pg->osd->get_con_osd_cluster(
it->osd, pg->get_osdmap()->get_epoch());
if (con) {
pg->osd->send_message_osd_cluster(
new MBackfillReserve(
MBackfillReserve::RELEASE,
spg_t(pg->info.pgid.pgid, it->shard),
pg->get_osdmap()->get_epoch()),
con.get());
}
}
pg->waiting_on_backfill.clear();
pg->finish_recovery_op(hobject_t::get_max());
pg->state_clear(PG_STATE_BACKFILLING);
cancel_backfill();
pg->schedule_backfill_retry(pg->cct->_conf->osd_recovery_retry_interval);
return transit<NotBackfilling>();
}
@ -6510,27 +6477,8 @@ boost::statechart::result
PG::RecoveryState::Backfilling::react(const RemoteReservationRevoked &)
{
PG *pg = context< RecoveryMachine >().pg;
pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
pg->state_set(PG_STATE_BACKFILL_WAIT);
for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
it != pg->backfill_targets.end();
++it) {
assert(*it != pg->pg_whoami);
ConnectionRef con = pg->osd->get_con_osd_cluster(
it->osd, pg->get_osdmap()->get_epoch());
if (con) {
pg->osd->send_message_osd_cluster(
new MBackfillReserve(
MBackfillReserve::RELEASE,
spg_t(pg->info.pgid.pgid, it->shard),
pg->get_osdmap()->get_epoch()),
con.get());
}
}
pg->waiting_on_backfill.clear();
cancel_backfill();
return transit<WaitLocalBackfillReserved>();
}
@ -6594,8 +6542,7 @@ void PG::RecoveryState::WaitRemoteBackfillReserved::exit()
pg->osd->recoverystate_perf->tinc(rs_waitremotebackfillreserved_latency, dur);
}
boost::statechart::result
PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteReservationRejected &evt)
void PG::RecoveryState::WaitRemoteBackfillReserved::retry()
{
PG *pg = context< RecoveryMachine >().pg;
pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
@ -6625,7 +6572,19 @@ PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteReservationReje
pg->publish_stats_to_osd();
pg->schedule_backfill_retry(pg->cct->_conf->osd_recovery_retry_interval);
}
boost::statechart::result
PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteReservationRejected &evt)
{
retry();
return transit<NotBackfilling>();
}
boost::statechart::result
PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteReservationRevoked &evt)
{
retry();
return transit<NotBackfilling>();
}

View File

@ -840,7 +840,7 @@ protected:
int recovery_ops_active;
set<pg_shard_t> waiting_on_backfill;
#ifdef DEBUG_RECOVERY_OIDS
set<hobject_t> recovering_oids;
multiset<hobject_t> recovering_oids;
#endif
protected:
@ -2193,6 +2193,7 @@ protected:
boost::statechart::result react(const RemoteReservationRevoked& evt);
boost::statechart::result react(const DeferBackfill& evt);
boost::statechart::result react(const UnfoundBackfill& evt);
void cancel_backfill();
void exit();
};
@ -2200,13 +2201,16 @@ protected:
typedef boost::mpl::list<
boost::statechart::custom_reaction< RemoteBackfillReserved >,
boost::statechart::custom_reaction< RemoteReservationRejected >,
boost::statechart::custom_reaction< RemoteReservationRevoked >,
boost::statechart::transition< AllBackfillsReserved, Backfilling >
> reactions;
set<pg_shard_t>::const_iterator backfill_osd_it;
explicit WaitRemoteBackfillReserved(my_context ctx);
void retry();
void exit();
boost::statechart::result react(const RemoteBackfillReserved& evt);
boost::statechart::result react(const RemoteReservationRejected& evt);
boost::statechart::result react(const RemoteReservationRevoked& evt);
};
struct WaitLocalBackfillReserved : boost::statechart::state< WaitLocalBackfillReserved, Active >, NamedState {