From 8c8e1b7835e032de0bb5c11b6a14f64af950f7a5 Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Wed, 7 Oct 2015 04:34:34 +0000 Subject: [PATCH] pg: add auto-repair for EC pool Fixes: #12754 Signed-off-by: Guang Yang --- src/common/config_opts.h | 2 ++ src/osd/ECBackend.h | 1 + src/osd/PG.cc | 26 +++++++++++++++++++++++++- src/osd/PG.h | 5 +++++ src/osd/PGBackend.h | 1 + 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 282fc0ddc89..9c1a3c2f676 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -674,6 +674,8 @@ OPTION(osd_scrub_interval_randomize_ratio, OPT_FLOAT, 0.5) // randomize the sche OPTION(osd_scrub_chunk_min, OPT_INT, 5) OPTION(osd_scrub_chunk_max, OPT_INT, 25) OPTION(osd_scrub_sleep, OPT_FLOAT, 0) // sleep between [deep]scrub ops +OPTION(osd_scrub_auto_repair, OPT_BOOL, false) // whether auto-repair inconsistencies upon deep-scrubbing +OPTION(osd_scrub_auto_repair_num_errors, OPT_U32, 5) // only auto-repair when number of errors is below this threshold OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week OPTION(osd_deep_scrub_stride, OPT_INT, 524288) OPTION(osd_deep_scrub_update_digest_min_age, OPT_INT, 2*60*60) // objects must be this old (seconds) before we update the whole-object digest on scrub diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index a039b70c8a8..efb284e2d73 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -494,6 +494,7 @@ public: ObjectStore::Transaction *t); bool scrub_supported() { return true; } + bool auto_repair_supported() const { return true; } void be_deep_scrub( const hobject_t &obj, diff --git a/src/osd/PG.cc b/src/osd/PG.cc index e97439cd514..b72d2317305 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2039,7 +2039,7 @@ bool PG::queue_scrub() state_set(PG_STATE_DEEP_SCRUB); scrubber.must_deep_scrub = false; } - if (scrubber.must_repair) { + if (scrubber.must_repair || scrubber.auto_repair) { state_set(PG_STATE_REPAIR); scrubber.must_repair = false; } @@ -3180,6 +3180,21 @@ bool PG::sched_scrub() return false; } + if (cct->_conf->osd_scrub_auto_repair + && get_pgbackend()->auto_repair_supported() + && time_for_deep + // respect the command from user, and not do auto-repair + && !scrubber.must_repair + && !scrubber.must_scrub + && !scrubber.must_deep_scrub) { + dout(20) << __func__ << ": auto repair with deep scrubbing" << dendl; + scrubber.auto_repair = true; + } else { + // this happens when user issue the scrub/repair command during + // the scheduling of the scrub/repair (e.g. request reservation) + scrubber.auto_repair = false; + } + bool ret = true; if (!scrubber.reserved) { assert(scrubber.reserved_peers.empty()); @@ -4205,6 +4220,13 @@ bool PG::scrub_process_inconsistent() void PG::scrub_finish() { bool repair = state_test(PG_STATE_REPAIR); + // if the repair request comes from auto-repair and large number of errors, + // we would like to cancel auto-repair + if (repair && scrubber.auto_repair + && scrubber.authoritative.size() > cct->_conf->osd_scrub_auto_repair_num_errors) { + state_clear(PG_STATE_REPAIR); + repair = false; + } bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); @@ -4940,6 +4962,8 @@ ostream& operator<<(ostream& out, const PG& pg) if (pg.scrubber.must_repair) out << " MUST_REPAIR"; + if (pg.scrubber.auto_repair) + out << " AUTO_REPAIR"; if (pg.scrubber.must_deep_scrub) out << " MUST_DEEP_SCRUB"; if (pg.scrubber.must_scrub) diff --git a/src/osd/PG.h b/src/osd/PG.h index 872c81e6fbf..00a172533b9 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1071,6 +1071,7 @@ public: active(false), queue_snap_trim(false), waiting_on(0), shallow_errors(0), deep_errors(0), fixed(0), must_scrub(false), must_deep_scrub(false), must_repair(false), + auto_repair(false), num_digest_updates_pending(0), state(INACTIVE), deep(false), @@ -1099,6 +1100,9 @@ public: // flags to indicate explicitly requested scrubs (by admin) bool must_scrub, must_deep_scrub, must_repair; + // this flag indicates whether we would like to do auto-repair of the PG or not + bool auto_repair; + // Maps from objects with errors to missing/inconsistent peers map, hobject_t::BitwiseComparator> missing; map, hobject_t::BitwiseComparator> inconsistent; @@ -1187,6 +1191,7 @@ public: must_scrub = false; must_deep_scrub = false; must_repair = false; + auto_repair = false; state = PG::Scrubber::INACTIVE; start = hobject_t(); diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 52599942b43..415f95fc3a4 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -548,6 +548,7 @@ Context *on_complete, bool fast_read = false) = 0; virtual bool scrub_supported() { return false; } + virtual bool auto_repair_supported() const { return false; } void be_scan_list( ScrubMap &map, const vector &ls, bool deep, uint32_t seed, ThreadPool::TPHandle &handle);