mirror of
https://github.com/ceph/ceph
synced 2024-12-19 09:57:05 +00:00
osd/PG: do not go into replay state
Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
parent
845f432dc1
commit
5aca4ebce6
@ -9331,7 +9331,7 @@ int OSD::init_op_flags(OpRequestRef& op)
|
||||
iter->op.watch.op == CEPH_OSD_WATCH_OP_PING)) {
|
||||
/* This a bit odd. PING isn't actually a write. It can't
|
||||
* result in an update to the object_info. PINGs also aren'ty
|
||||
* replayed, so there's no reason to write out a log entry
|
||||
* resent, so there's no reason to write out a log entry
|
||||
*
|
||||
* However, we pipeline them behind writes, so let's force
|
||||
* the write_ordered flag.
|
||||
|
@ -973,7 +973,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
// replay / delayed pg activation
|
||||
// delayed pg activation
|
||||
void queue_for_recovery(PG *pg, bool front = false) {
|
||||
Mutex::Locker l(recovery_lock);
|
||||
if (front) {
|
||||
|
@ -1551,24 +1551,6 @@ void PG::activate(ObjectStore::Transaction& t,
|
||||
assert(scrubber.callbacks.empty());
|
||||
assert(callbacks_for_degraded_object.empty());
|
||||
|
||||
// -- crash recovery?
|
||||
if (acting.size() >= pool.info.min_size &&
|
||||
is_primary() &&
|
||||
pool.info.crash_replay_interval > 0 &&
|
||||
may_need_replay(get_osdmap())) {
|
||||
replay_until = ceph_clock_now();
|
||||
replay_until += pool.info.crash_replay_interval;
|
||||
dout(10) << "activate starting replay interval for " << pool.info.crash_replay_interval
|
||||
<< " until " << replay_until << dendl;
|
||||
state_set(PG_STATE_REPLAY);
|
||||
|
||||
// TODOSAM: osd->osd-> is no good
|
||||
osd->osd->replay_queue_lock.Lock();
|
||||
osd->osd->replay_queue.push_back(pair<spg_t,utime_t>(
|
||||
info.pgid, replay_until));
|
||||
osd->osd->replay_queue_lock.Unlock();
|
||||
}
|
||||
|
||||
// twiddle pg state
|
||||
state_clear(PG_STATE_DOWN);
|
||||
|
||||
@ -4857,86 +4839,6 @@ void PG::fulfill_log(
|
||||
osd->send_message_osd_cluster(mlog, con.get());
|
||||
}
|
||||
|
||||
|
||||
// true if all OSDs in prior intervals may have crashed, and we need to replay
|
||||
// false positives are okay, false negatives are not.
|
||||
bool PG::may_need_replay(const OSDMapRef osdmap) const
|
||||
{
|
||||
bool crashed = false;
|
||||
|
||||
for (map<epoch_t,pg_interval_t>::const_reverse_iterator p = past_intervals.rbegin();
|
||||
p != past_intervals.rend();
|
||||
++p) {
|
||||
const pg_interval_t &interval = p->second;
|
||||
dout(10) << "may_need_replay " << interval << dendl;
|
||||
|
||||
if (interval.last < info.history.last_epoch_started)
|
||||
break; // we don't care
|
||||
|
||||
if (interval.acting.empty())
|
||||
continue;
|
||||
|
||||
if (!interval.maybe_went_rw)
|
||||
continue;
|
||||
|
||||
// look at whether any of the osds during this interval survived
|
||||
// past the end of the interval (i.e., didn't crash and
|
||||
// potentially fail to COMMIT a write that it ACKed).
|
||||
bool any_survived_interval = false;
|
||||
|
||||
// consider ACTING osds
|
||||
for (unsigned i=0; i<interval.acting.size(); i++) {
|
||||
int o = interval.acting[i];
|
||||
if (o == CRUSH_ITEM_NONE)
|
||||
continue;
|
||||
|
||||
const osd_info_t *pinfo = 0;
|
||||
if (osdmap->exists(o))
|
||||
pinfo = &osdmap->get_info(o);
|
||||
|
||||
// does this osd appear to have survived through the end of the
|
||||
// interval?
|
||||
if (pinfo) {
|
||||
if (pinfo->up_from <= interval.first && pinfo->up_thru > interval.last) {
|
||||
dout(10) << "may_need_replay osd." << o
|
||||
<< " up_from " << pinfo->up_from << " up_thru " << pinfo->up_thru
|
||||
<< " survived the interval" << dendl;
|
||||
any_survived_interval = true;
|
||||
}
|
||||
else if (pinfo->up_from <= interval.first &&
|
||||
(std::find(acting.begin(), acting.end(), o) != acting.end() ||
|
||||
std::find(up.begin(), up.end(), o) != up.end())) {
|
||||
dout(10) << "may_need_replay osd." << o
|
||||
<< " up_from " << pinfo->up_from << " and is in acting|up,"
|
||||
<< " assumed to have survived the interval" << dendl;
|
||||
// (if it hasn't, we will rebuild PriorSet)
|
||||
any_survived_interval = true;
|
||||
}
|
||||
else if (pinfo->up_from > interval.last &&
|
||||
pinfo->last_clean_begin <= interval.first &&
|
||||
pinfo->last_clean_end > interval.last) {
|
||||
dout(10) << "may_need_replay prior osd." << o
|
||||
<< " up_from " << pinfo->up_from
|
||||
<< " and last clean interval ["
|
||||
<< pinfo->last_clean_begin << "," << pinfo->last_clean_end
|
||||
<< ") survived the interval" << dendl;
|
||||
any_survived_interval = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!any_survived_interval) {
|
||||
dout(3) << "may_need_replay no known survivors of interval "
|
||||
<< interval.first << "-" << interval.last
|
||||
<< ", may need replay" << dendl;
|
||||
crashed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return crashed;
|
||||
}
|
||||
|
||||
void PG::check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap)
|
||||
{
|
||||
bool changed = false;
|
||||
|
@ -521,8 +521,6 @@ public:
|
||||
set<hobject_t, hobject_t::BitwiseComparator> recovering_oids;
|
||||
#endif
|
||||
|
||||
utime_t replay_until;
|
||||
|
||||
protected:
|
||||
int role; // 0 = primary, 1 = replica, -1=none.
|
||||
unsigned state; // PG_STATE_*
|
||||
@ -593,8 +591,6 @@ public:
|
||||
friend std::ostream& operator<<(std::ostream& oss,
|
||||
const struct PriorSet &prior);
|
||||
|
||||
bool may_need_replay(const OSDMapRef osdmap) const;
|
||||
|
||||
|
||||
public:
|
||||
struct BufferedRecoveryMessages {
|
||||
@ -2172,7 +2168,6 @@ public:
|
||||
bool is_activating() const { return state_test(PG_STATE_ACTIVATING); }
|
||||
bool is_peering() const { return state_test(PG_STATE_PEERING); }
|
||||
bool is_down() const { return state_test(PG_STATE_DOWN); }
|
||||
bool is_replay() const { return state_test(PG_STATE_REPLAY); }
|
||||
bool is_clean() const { return state_test(PG_STATE_CLEAN); }
|
||||
bool is_degraded() const { return state_test(PG_STATE_DEGRADED); }
|
||||
bool is_undersized() const { return state_test(PG_STATE_UNDERSIZED); }
|
||||
|
@ -1637,12 +1637,6 @@ void PrimaryLogPG::do_request(
|
||||
op->mark_delayed("waiting for active");
|
||||
return;
|
||||
}
|
||||
if (is_replay()) {
|
||||
dout(20) << " replay, waiting for active on " << op << dendl;
|
||||
waiting_for_active.push_back(op);
|
||||
op->mark_delayed("waiting for replay end");
|
||||
return;
|
||||
}
|
||||
// verify client features
|
||||
if ((pool.info.has_tiers() || pool.info.is_tier()) &&
|
||||
!op->has_feature(CEPH_FEATURE_OSD_CACHEPOOL)) {
|
||||
|
Loading…
Reference in New Issue
Block a user