osd/PG: do not go into replay state

Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2016-12-20 15:44:28 -05:00
parent 845f432dc1
commit 5aca4ebce6
5 changed files with 2 additions and 111 deletions

View File

@ -9331,7 +9331,7 @@ int OSD::init_op_flags(OpRequestRef& op)
iter->op.watch.op == CEPH_OSD_WATCH_OP_PING)) {
/* This a bit odd. PING isn't actually a write. It can't
* result in an update to the object_info. PINGs also aren'ty
* replayed, so there's no reason to write out a log entry
* resent, so there's no reason to write out a log entry
*
* However, we pipeline them behind writes, so let's force
* the write_ordered flag.

View File

@ -973,7 +973,7 @@ public:
}
}
}
// replay / delayed pg activation
// delayed pg activation
void queue_for_recovery(PG *pg, bool front = false) {
Mutex::Locker l(recovery_lock);
if (front) {

View File

@ -1551,24 +1551,6 @@ void PG::activate(ObjectStore::Transaction& t,
assert(scrubber.callbacks.empty());
assert(callbacks_for_degraded_object.empty());
// -- crash recovery?
if (acting.size() >= pool.info.min_size &&
is_primary() &&
pool.info.crash_replay_interval > 0 &&
may_need_replay(get_osdmap())) {
replay_until = ceph_clock_now();
replay_until += pool.info.crash_replay_interval;
dout(10) << "activate starting replay interval for " << pool.info.crash_replay_interval
<< " until " << replay_until << dendl;
state_set(PG_STATE_REPLAY);
// TODOSAM: osd->osd-> is no good
osd->osd->replay_queue_lock.Lock();
osd->osd->replay_queue.push_back(pair<spg_t,utime_t>(
info.pgid, replay_until));
osd->osd->replay_queue_lock.Unlock();
}
// twiddle pg state
state_clear(PG_STATE_DOWN);
@ -4857,86 +4839,6 @@ void PG::fulfill_log(
osd->send_message_osd_cluster(mlog, con.get());
}
// true if all OSDs in prior intervals may have crashed, and we need to replay
// false positives are okay, false negatives are not.
bool PG::may_need_replay(const OSDMapRef osdmap) const
{
bool crashed = false;
for (map<epoch_t,pg_interval_t>::const_reverse_iterator p = past_intervals.rbegin();
p != past_intervals.rend();
++p) {
const pg_interval_t &interval = p->second;
dout(10) << "may_need_replay " << interval << dendl;
if (interval.last < info.history.last_epoch_started)
break; // we don't care
if (interval.acting.empty())
continue;
if (!interval.maybe_went_rw)
continue;
// look at whether any of the osds during this interval survived
// past the end of the interval (i.e., didn't crash and
// potentially fail to COMMIT a write that it ACKed).
bool any_survived_interval = false;
// consider ACTING osds
for (unsigned i=0; i<interval.acting.size(); i++) {
int o = interval.acting[i];
if (o == CRUSH_ITEM_NONE)
continue;
const osd_info_t *pinfo = 0;
if (osdmap->exists(o))
pinfo = &osdmap->get_info(o);
// does this osd appear to have survived through the end of the
// interval?
if (pinfo) {
if (pinfo->up_from <= interval.first && pinfo->up_thru > interval.last) {
dout(10) << "may_need_replay osd." << o
<< " up_from " << pinfo->up_from << " up_thru " << pinfo->up_thru
<< " survived the interval" << dendl;
any_survived_interval = true;
}
else if (pinfo->up_from <= interval.first &&
(std::find(acting.begin(), acting.end(), o) != acting.end() ||
std::find(up.begin(), up.end(), o) != up.end())) {
dout(10) << "may_need_replay osd." << o
<< " up_from " << pinfo->up_from << " and is in acting|up,"
<< " assumed to have survived the interval" << dendl;
// (if it hasn't, we will rebuild PriorSet)
any_survived_interval = true;
}
else if (pinfo->up_from > interval.last &&
pinfo->last_clean_begin <= interval.first &&
pinfo->last_clean_end > interval.last) {
dout(10) << "may_need_replay prior osd." << o
<< " up_from " << pinfo->up_from
<< " and last clean interval ["
<< pinfo->last_clean_begin << "," << pinfo->last_clean_end
<< ") survived the interval" << dendl;
any_survived_interval = true;
}
}
}
if (!any_survived_interval) {
dout(3) << "may_need_replay no known survivors of interval "
<< interval.first << "-" << interval.last
<< ", may need replay" << dendl;
crashed = true;
break;
}
}
return crashed;
}
void PG::check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap)
{
bool changed = false;

View File

@ -521,8 +521,6 @@ public:
set<hobject_t, hobject_t::BitwiseComparator> recovering_oids;
#endif
utime_t replay_until;
protected:
int role; // 0 = primary, 1 = replica, -1=none.
unsigned state; // PG_STATE_*
@ -593,8 +591,6 @@ public:
friend std::ostream& operator<<(std::ostream& oss,
const struct PriorSet &prior);
bool may_need_replay(const OSDMapRef osdmap) const;
public:
struct BufferedRecoveryMessages {
@ -2172,7 +2168,6 @@ public:
bool is_activating() const { return state_test(PG_STATE_ACTIVATING); }
bool is_peering() const { return state_test(PG_STATE_PEERING); }
bool is_down() const { return state_test(PG_STATE_DOWN); }
bool is_replay() const { return state_test(PG_STATE_REPLAY); }
bool is_clean() const { return state_test(PG_STATE_CLEAN); }
bool is_degraded() const { return state_test(PG_STATE_DEGRADED); }
bool is_undersized() const { return state_test(PG_STATE_UNDERSIZED); }

View File

@ -1637,12 +1637,6 @@ void PrimaryLogPG::do_request(
op->mark_delayed("waiting for active");
return;
}
if (is_replay()) {
dout(20) << " replay, waiting for active on " << op << dendl;
waiting_for_active.push_back(op);
op->mark_delayed("waiting for replay end");
return;
}
// verify client features
if ((pool.info.has_tiers() || pool.info.is_tier()) &&
!op->has_feature(CEPH_FEATURE_OSD_CACHEPOOL)) {