osd: Check whether any OSD is full before starting recovery

Add event RecoveryTooFull to move to NotRecovering state

Signed-off-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
David Zafman 2017-04-05 14:12:43 -07:00
parent 27e14504f6
commit 84088568b5
3 changed files with 23 additions and 1 deletions

View File

@ -871,6 +871,7 @@ OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false)
OPTION(osd_debug_reject_backfill_probability, OPT_DOUBLE, 0)
OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false) // inject failure during copyfrom completion
OPTION(osd_debug_misdirected_ops, OPT_BOOL, false)
OPTION(osd_debug_skip_full_check_in_recovery, OPT_BOOL, false)
OPTION(osd_enxio_on_misdirected_op, OPT_BOOL, false)
OPTION(osd_debug_verify_cached_snaps, OPT_BOOL, false)
OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking

View File

@ -6700,6 +6700,15 @@ PG::RecoveryState::WaitLocalRecoveryReserved::WaitLocalRecoveryReserved(my_conte
{
context< RecoveryMachine >().log_enter(state_name);
PG *pg = context< RecoveryMachine >().pg;
// Make sure all nodes that part of the recovery aren't full
if (!pg->cct->_conf->osd_debug_skip_full_check_in_recovery &&
pg->osd->check_osdmap_full(pg->actingbackfill)) {
post_event(RecoveryTooFull());
return;
}
pg->state_clear(PG_STATE_RECOVERY_TOOFULL);
pg->state_set(PG_STATE_RECOVERY_WAIT);
pg->osd->local_reserver.request_reservation(
pg->info.pgid,
@ -6710,6 +6719,15 @@ PG::RecoveryState::WaitLocalRecoveryReserved::WaitLocalRecoveryReserved(my_conte
pg->publish_stats_to_osd();
}
boost::statechart::result
PG::RecoveryState::WaitLocalRecoveryReserved::react(const RecoveryTooFull &evt)
{
PG *pg = context< RecoveryMachine >().pg;
pg->state_set(PG_STATE_RECOVERY_TOOFULL);
pg->schedule_recovery_full_retry();
return transit<NotRecovering>();
}
void PG::RecoveryState::WaitLocalRecoveryReserved::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);

View File

@ -1506,6 +1506,7 @@ public:
TrivialEvent(RequestRecovery)
TrivialEvent(RecoveryDone)
TrivialEvent(BackfillTooFull)
TrivialEvent(RecoveryTooFull)
TrivialEvent(AllReplicasRecovered)
TrivialEvent(DoRecovery)
@ -1947,10 +1948,12 @@ public:
struct WaitLocalRecoveryReserved : boost::statechart::state< WaitLocalRecoveryReserved, Active >, NamedState {
typedef boost::mpl::list <
boost::statechart::transition< LocalRecoveryReserved, WaitRemoteRecoveryReserved >
boost::statechart::transition< LocalRecoveryReserved, WaitRemoteRecoveryReserved >,
boost::statechart::custom_reaction< RecoveryTooFull >
> reactions;
explicit WaitLocalRecoveryReserved(my_context ctx);
void exit();
boost::statechart::result react(const RecoveryTooFull &evt);
};
struct Activating : boost::statechart::state< Activating, Active >, NamedState {