From 84088568b5bf4fb3fa48ebf3e157d288f9f29eed Mon Sep 17 00:00:00 2001 From: David Zafman Date: Wed, 5 Apr 2017 14:12:43 -0700 Subject: [PATCH] osd: Check whether any OSD is full before starting recovery Add event RecoveryTooFull to move to NotRecovering state Signed-off-by: David Zafman --- src/common/config_opts.h | 1 + src/osd/PG.cc | 18 ++++++++++++++++++ src/osd/PG.h | 5 ++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index c1630a44c01..c742ae6fa20 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -871,6 +871,7 @@ OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false) OPTION(osd_debug_reject_backfill_probability, OPT_DOUBLE, 0) OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false) // inject failure during copyfrom completion OPTION(osd_debug_misdirected_ops, OPT_BOOL, false) +OPTION(osd_debug_skip_full_check_in_recovery, OPT_BOOL, false) OPTION(osd_enxio_on_misdirected_op, OPT_BOOL, false) OPTION(osd_debug_verify_cached_snaps, OPT_BOOL, false) OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 576ec836dc5..c5a35dbaa4a 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -6700,6 +6700,15 @@ PG::RecoveryState::WaitLocalRecoveryReserved::WaitLocalRecoveryReserved(my_conte { context< RecoveryMachine >().log_enter(state_name); PG *pg = context< RecoveryMachine >().pg; + + // Make sure all nodes that part of the recovery aren't full + if (!pg->cct->_conf->osd_debug_skip_full_check_in_recovery && + pg->osd->check_osdmap_full(pg->actingbackfill)) { + post_event(RecoveryTooFull()); + return; + } + + pg->state_clear(PG_STATE_RECOVERY_TOOFULL); pg->state_set(PG_STATE_RECOVERY_WAIT); pg->osd->local_reserver.request_reservation( pg->info.pgid, @@ -6710,6 +6719,15 @@ PG::RecoveryState::WaitLocalRecoveryReserved::WaitLocalRecoveryReserved(my_conte pg->publish_stats_to_osd(); } +boost::statechart::result +PG::RecoveryState::WaitLocalRecoveryReserved::react(const RecoveryTooFull &evt) +{ + PG *pg = context< RecoveryMachine >().pg; + pg->state_set(PG_STATE_RECOVERY_TOOFULL); + pg->schedule_recovery_full_retry(); + return transit(); +} + void PG::RecoveryState::WaitLocalRecoveryReserved::exit() { context< RecoveryMachine >().log_exit(state_name, enter_time); diff --git a/src/osd/PG.h b/src/osd/PG.h index 6ac48fd6cbf..b2168b3ee64 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1506,6 +1506,7 @@ public: TrivialEvent(RequestRecovery) TrivialEvent(RecoveryDone) TrivialEvent(BackfillTooFull) + TrivialEvent(RecoveryTooFull) TrivialEvent(AllReplicasRecovered) TrivialEvent(DoRecovery) @@ -1947,10 +1948,12 @@ public: struct WaitLocalRecoveryReserved : boost::statechart::state< WaitLocalRecoveryReserved, Active >, NamedState { typedef boost::mpl::list < - boost::statechart::transition< LocalRecoveryReserved, WaitRemoteRecoveryReserved > + boost::statechart::transition< LocalRecoveryReserved, WaitRemoteRecoveryReserved >, + boost::statechart::custom_reaction< RecoveryTooFull > > reactions; explicit WaitLocalRecoveryReserved(my_context ctx); void exit(); + boost::statechart::result react(const RecoveryTooFull &evt); }; struct Activating : boost::statechart::state< Activating, Active >, NamedState {