diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 1b97618ee3c..a63ab28779c 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -928,7 +928,22 @@ void PG::clear_primary_state() */ map::const_iterator PG::find_best_info(const map &infos) const { - epoch_t last_epoch_started = 0; + eversion_t min_last_update_acceptable = eversion_t::max(); + epoch_t max_last_epoch_started_found = 0; + for (map::const_iterator i = infos.begin(); + i != infos.end(); + ++i) { + if (max_last_epoch_started_found < i->second.history.last_epoch_started) { + min_last_update_acceptable = eversion_t::max(); + max_last_epoch_started_found = i->second.history.last_epoch_started; + } + if (max_last_epoch_started_found == i->second.history.last_epoch_started) { + if (min_last_update_acceptable > i->second.last_update) + min_last_update_acceptable = i->second.last_update; + } + } + assert(min_last_update_acceptable != eversion_t::max()); + map::const_iterator best = infos.end(); // find osd with newest last_update. if there are multiples, prefer // - a longer tail, if it brings another peer into log contiguity @@ -936,15 +951,9 @@ map::const_iterator PG::find_best_info(const map for (map::const_iterator p = infos.begin(); p != infos.end(); ++p) { - // Only consider peers with the most recent last_epoch_started found - if (p->second.history.last_epoch_started > last_epoch_started) { - last_epoch_started = p->second.history.last_epoch_started; - if (best != infos.end() && - last_epoch_started > best->second.history.last_epoch_started) - best = infos.end(); - } else if (p->second.history.last_epoch_started < last_epoch_started) { + // Only consider peers with last_update >= min_last_update_acceptable + if (p->second.last_update < min_last_update_acceptable) continue; - } // Disquality anyone who is incomplete (not fully backfilled) if (p->second.is_incomplete()) continue; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 9cd79414a81..8d11aea2e30 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -417,6 +417,13 @@ public: eversion_t(bufferlist& bl) : __pad(0) { decode(bl); } + static eversion_t max() { + eversion_t max; + max.version -= 1; + max.epoch -= 1; + return max; + } + operator ceph_eversion() { ceph_eversion c; c.epoch = epoch;