Merge branch 'wip_choose_acting' into stable

This commit is contained in:
Sage Weil 2011-05-20 00:41:31 -07:00
commit 6995fd515c
5 changed files with 197 additions and 96 deletions

View File

@ -3935,8 +3935,14 @@ void OSD::do_infos(map<int,MOSDPGInfo*>& info_map)
{
for (map<int,MOSDPGInfo*>::iterator p = info_map.begin();
p != info_map.end();
++p)
++p) {
for (vector<PG::Info>::iterator i = p->second->pg_info.begin();
i != p->second->pg_info.end();
++i) {
dout(20) << "Sending info " << *i << " to osd" << p->first << dendl;
}
cluster_messenger->send_message(p->second, osdmap->get_cluster_inst(p->first));
}
info_map.clear();
}
@ -3973,6 +3979,12 @@ void OSD::handle_pg_notify(MOSDPGNotify *m)
if (!pg)
continue;
if (pg->old_peering_msg(m->get_epoch())) {
dout(10) << "ignoring old peering message " << *m << dendl;
pg->unlock();
continue;
}
PG::RecoveryCtx rctx(&query_map, &info_map, 0, &fin->contexts, t);
pg->handle_notify(from, *it, &rctx);
@ -4007,8 +4019,16 @@ void OSD::handle_pg_log(MOSDPGLog *m)
C_Contexts *fin;
PG *pg = get_or_create_pg(m->info, m->get_epoch(),
from, created, false, &t, &fin);
if (!pg)
if (!pg) {
m->put();
return;
}
if (pg->old_peering_msg(m->get_epoch())) {
dout(10) << "ignoring old peering message " << *m << dendl;
pg->unlock();
return;
}
map< int, map<pg_t,PG::Query> > query_map;
map< int, MOSDPGInfo* > info_map;
@ -4049,6 +4069,13 @@ void OSD::handle_pg_info(MOSDPGInfo *m)
PG::RecoveryCtx rctx(0, &info_map, 0, &fin->contexts, t);
if (!pg)
continue;
if (pg->old_peering_msg(m->get_epoch())) {
dout(10) << "ignoring old peering message " << *m << dendl;
pg->unlock();
continue;
}
pg->handle_info(from, *p, &rctx);
int tr = store->queue_transaction(&pg->osr, t, new ObjectStore::C_DeleteTransaction(t), fin);
@ -4177,7 +4204,16 @@ void OSD::handle_pg_query(MOSDPGQuery *m)
assert(role != 0);
dout(10) << " pg " << pgid << " dne" << dendl;
PG::Info empty(pgid);
notify_list[from].push_back(empty);
if (it->second.type == PG::Query::LOG ||
it->second.type == PG::Query::BACKLOG ||
it->second.type == PG::Query::FULLLOG) {
MOSDPGLog *mlog = new MOSDPGLog(osdmap->get_epoch(), empty);
_share_map_outgoing(osdmap->get_cluster_inst(from));
cluster_messenger->send_message(mlog,
osdmap->get_cluster_inst(from));
} else {
notify_list[from].push_back(empty);
}
continue;
}
@ -4190,6 +4226,12 @@ void OSD::handle_pg_query(MOSDPGQuery *m)
continue;
}
if (pg->old_peering_msg(m->get_epoch())) {
dout(10) << "ignoring old peering message " << *m << dendl;
pg->unlock();
continue;
}
if (pg->deleting) {
/*
* We cancel deletion on pg change. And the primary will never
@ -4285,6 +4327,8 @@ void OSD::_remove_pg(PG *pg)
// reset log, last_complete, in case deletion gets canceled
pg->info.last_complete = eversion_t();
pg->info.last_update = eversion_t();
pg->info.log_tail = eversion_t();
pg->log.zero();
pg->ondisklog.zero();

View File

@ -388,7 +388,10 @@ void PG::merge_log(ObjectStore::Transaction& t,
bool changed = false;
if (log.empty() ||
(olog.tail > log.head && olog.backlog)) { // e.g. log=(0,20] olog=(40,50]+backlog)
(olog.tail > log.head && olog.backlog) || // e.g. log=(0,20] olog=(40,50]+backlog)
(log.head <= olog.head &&
log.tail >= olog.tail &&
!log.backlog && olog.backlog)) { // olog is clearly superior in every way
if (is_primary()) {
// we should have our own backlog already; see peer() code where
@ -470,7 +473,8 @@ void PG::merge_log(ObjectStore::Transaction& t,
log.index(*to);
dout(15) << *to << dendl;
}
assert(to != olog.log.end());
assert(to != olog.log.end() ||
(olog.head == info.last_update));
// splice into our log.
log.log.splice(log.log.begin(),
@ -1031,7 +1035,7 @@ bool PG::prior_set_affected(PgPriorSet &prior, const OSDMap *osdmap) const
return false;
}
bool PG::adjust_need_up_thru(PgPriorSet &prior, const OSDMap *osdmap)
bool PG::adjust_need_up_thru(const OSDMap *osdmap)
{
epoch_t up_thru = osd->osdmap->get_up_thru(osd->whoami);
if (need_up_thru &&
@ -1166,7 +1170,6 @@ void PG::build_prior(std::auto_ptr<PgPriorSet> &prior_set)
assert(info.history.last_epoch_started >= it->second.history.last_epoch_started);
}
}
stringstream out;
prior_set.reset(new PgPriorSet(osd->whoami,
*osd->osdmap,
past_intervals,
@ -1174,14 +1177,8 @@ void PG::build_prior(std::auto_ptr<PgPriorSet> &prior_set)
acting,
info,
this));
dout(10) << out << dendl;
PgPriorSet &prior(*prior_set.get());
dout(10) << "build_prior: " << *this << " "
<< (prior.crashed ? " crashed":"")
<< (prior.pg_down ? " down":"")
<< (prior.some_down ? " some_down":"")
<< dendl;
// take note that we care about the primary's up_thru. if it
// changes later, it will affect our prior_set, and we'll want
// to rebuild it!
@ -1232,6 +1229,7 @@ void PG::clear_primary_state()
peer_last_complete_ondisk.clear();
peer_activated.clear();
min_last_complete_ondisk = eversion_t();
pg_trim_to = eversion_t();
stray_purged.clear();
might_have_unfound.clear();
@ -1249,37 +1247,48 @@ void PG::clear_primary_state()
osd->snap_trim_wq.dequeue(this);
}
bool PG::choose_acting(int newest_update_osd)
bool PG::choose_acting(int newest_update_osd) const
{
vector<int> want = up;
Info& newest = (newest_update_osd == osd->whoami) ? info : peer_info[newest_update_osd];
Info& oprimi = (want[0] == osd->whoami) ? info : peer_info[want[0]];
if (newest_update_osd != want[0] &&
oprimi.last_update < newest.log_tail && !newest.log_backlog) {
// up[0] needs a backlog to catch up
// make newest_update_osd primary instead?
for (unsigned i=1; i<want.size(); i++)
if (want[i] == newest_update_osd) {
dout(10) << "choose_acting up[0] osd" << want[0] << " needs backlog to catch up, making "
<< want[i] << " primary" << dendl;
want[0] = want[i];
want[i] = up[0];
break;
}
}
// exclude peers who need backlogs to catch up?
Info& primi = (want[0] == osd->whoami) ? info : peer_info[want[0]];
for (vector<int>::iterator p = want.begin() + 1; p != want.end(); ) {
Info& pi = (*p == osd->whoami) ? info : peer_info[*p];
if (pi.last_update < primi.log_tail && !primi.log_backlog) {
dout(10) << "choose_acting osd" << *p << " needs primary backlog to catch up" << dendl;
p = want.erase(p);
vector<int> want;
map<int, Info> all_info(peer_info.begin(), peer_info.end());
all_info[osd->whoami] = info;
const Info &best_info = all_info.find(newest_update_osd)->second;
dout(10) << "choose_acting best_info is " << best_info
<< " from osd" << newest_update_osd << dendl;
for (vector<int>::const_iterator i = up.begin();
i != up.end();
++i) {
const Info &cur_info = all_info.find(*i)->second;
if (best_info.log_tail <= cur_info.last_update || best_info.log_backlog) {
// Can be brought up to date without stopping to generate a backlog
want.push_back(*i);
dout(10) << " osd" << *i << " (up) accepted" << dendl;
} else {
dout(10) << "choose_acting osd" << *p << " can catch up with osd" << want[0] << " log" << dendl;
p++;
dout(10) << " osd" << *i << " (up) REJECTED" << dendl;
}
}
for (map<int, Info>::const_iterator i = all_info.begin();
i != all_info.end();
++i) {
if (want.size() >= osd->osdmap->get_pg_size(info.pgid))
break;
vector<int>::const_iterator up_it = find(up.begin(), up.end(), i->first);
if (up_it != up.end())
continue;
if (best_info.log_tail <= i->second.last_update || best_info.log_backlog) {
// Can be brought up to date without stopping to generate a backlog
want.push_back(i->first);
dout(10) << " osd" << i->first << " (stray) accepted" << dendl;
} else {
dout(10) << " osd" << i->first << " (stray) REJECTED" << dendl;
}
}
if (want != acting) {
dout(10) << "choose_acting want " << want << " != acting " << acting
<< ", requesting pg_temp change" << dendl;
@ -1294,37 +1303,52 @@ bool PG::choose_acting(int newest_update_osd)
return true;
}
void PG::choose_log_location(const PgPriorSet &prior_set,
bool PG::choose_log_location(const PgPriorSet &prior_set,
bool &need_backlog,
bool &wait_on_backlog,
int &pull_from,
eversion_t &newest_update,
eversion_t &oldest_update) const
{
// Find the osd with the most recent update
pull_from = -1;
const Info *best_info = NULL;
need_backlog = false;
wait_on_backlog = false;
const Info *best_info = &info;
for (map<int, Info>::const_iterator it = peer_info.begin();
it != peer_info.end();
++it) {
// Only consider osds in the prior set
if (prior_set.cur.find(it->first) == prior_set.cur.end()) {
map<int, Info> all_info(peer_info.begin(), peer_info.end());
all_info[osd->whoami] = info;
for (map<int, Info>::const_iterator i = all_info.begin();
i != all_info.end();
++i) {
if (prior_set.cur.find(i->first) == prior_set.cur.end()) {
dout(10) << "osd" << i->first << " not in current prior set, skipping" << dendl;
continue;
}
if (best_info->last_update < it->second.last_update) {
best_info = &(it->second);
pull_from = it->first;
// We always want the latest last update, but we prefer one that
// has a backlog. If last update and backlog status are the same,
// prefer the one with the longer log.
if (!best_info ||
i->second.last_update > best_info->last_update ||
((i->second.last_update == best_info->last_update) &&
(((i->second.log_tail < best_info->log_tail &&
(i->second.log_backlog || !best_info->log_backlog))) ||
(i->second.log_backlog && !best_info->log_backlog)))) {
best_info = &(i->second);
pull_from = i->first;
newest_update = i->second.last_update;
}
if (oldest_update > i->second.last_update) {
oldest_update = i->second.last_update;
}
}
newest_update = best_info->last_update;
if (pull_from >= 0)
dout(10) << "choose_log_location newest_update " << newest_update
<< " on osd" << pull_from << dendl;
else
dout(10) << "choose_log_location newest_update " << newest_update
<< " (local)" << dendl;
dout(10) << "choose_log_location newest_update " << newest_update
<< " on osd" << pull_from << dendl;
if (!choose_acting(pull_from)) {
return false;
}
for (vector<int>::const_iterator it = ++acting.begin();
it != acting.end();
@ -1336,15 +1360,11 @@ void PG::choose_log_location(const PgPriorSet &prior_set,
}
}
oldest_update = info.last_update;
for (vector<int>::const_iterator it = up.begin();
it != up.end();
++it) {
if (*it == osd->whoami) continue;
const Info &pi = peer_info.find(*it)->second;
if (oldest_update > pi.last_update) {
oldest_update = pi.last_update;
}
vector<int>::const_iterator acting_it = find(acting.begin(), acting.end(), *it);
if (acting_it != acting.end())
@ -1357,6 +1377,7 @@ void PG::choose_log_location(const PgPriorSet &prior_set,
need_backlog = true;
}
}
// check our own info -- we aren't in peer_info
if (best_info->log_tail > info.last_update) {
wait_on_backlog = true;
@ -1368,6 +1389,7 @@ void PG::choose_log_location(const PgPriorSet &prior_set,
dout(10) << "must generate backlog because my last_complete " << info.last_complete
<< " < log.tail " << info.log_tail << " and no backlog" << dendl;
need_backlog = true;
wait_on_backlog = true;
}
for (vector<int>::const_iterator it = ++acting.begin();
it != acting.end();
@ -1388,6 +1410,7 @@ void PG::choose_log_location(const PgPriorSet &prior_set,
<< (need_backlog ? " need_backlog" : "")
<< (wait_on_backlog ? " wait_on_backlog" : "")
<< dendl;
return true;
}
/* Build the might_have_unfound set.
@ -1582,7 +1605,7 @@ void PG::activate(ObjectStore::Transaction& t, list<Context*>& tfin,
// the replica's tail is after it's last_complete and it has no backlog.
// ick, this shouldn't normally happen. but we can compensate!
dout(10) << "activate peer osd" << peer << " has last_complete < log tail and no backlog, compensating" << dendl;
if (log.tail >= pi.last_complete) {
if (log.tail <= pi.last_complete) {
// _our_ log is sufficient, phew!
m->log.copy_after(log, pi.last_complete);
} else {
@ -3398,6 +3421,11 @@ bool PG::acting_up_affected(const vector<int>& newup, const vector<int>& newacti
}
}
bool PG::old_peering_msg(const epoch_t &msg_epoch)
{
return (last_warm_restart > msg_epoch);
}
/* Called before initializing peering during advance_map */
void PG::warm_restart(const OSDMap& lastmap, const vector<int>& newup, const vector<int>& newacting)
{
@ -3406,6 +3434,8 @@ void PG::warm_restart(const OSDMap& lastmap, const vector<int>& newup, const vec
// -- there was a change! --
kick();
last_warm_restart = osdmap.get_epoch();
vector<int> oldacting, oldup;
int oldrole = get_role();
int oldprimary = get_primary();
@ -3635,6 +3665,8 @@ void PG::Missing::add_next_event(Log::Entry& e)
//assert(missing[e.soid].need == e.prior_version);
rmissing.erase(missing[e.soid].need);
missing[e.soid].need = e.version; // leave .have unchanged.
} else if (e.is_backlog()) {
missing[e.soid].need = e.version;
} else {
// not missing, we must have prior_version (if any)
missing[e.soid] = item(e.version, e.prior_version);
@ -3653,6 +3685,8 @@ void PG::Missing::add_event(Log::Entry& e)
// missing older, revise need
rmissing.erase(missing[e.soid].need);
missing[e.soid].need = e.version;
} else if (e.is_backlog()) {
missing[e.soid].need = e.version;
} else
// not missing => have prior_version (if any)
missing[e.soid] = item(e.version, e.prior_version);
@ -3977,7 +4011,7 @@ boost::statechart::result PG::RecoveryState::Peering::react(const AdvMap& advmap
return transit< Reset >();
}
pg->adjust_need_up_thru(*prior_set.get(), &advmap.osdmap);
pg->adjust_need_up_thru(&advmap.osdmap);
return forward_event();
}
@ -4288,6 +4322,9 @@ void PG::RecoveryState::GetInfo::get_infos()
it != prior_set->cur.end();
++it) {
int peer = *it;
if (peer == pg->osd->whoami) {
continue;
}
if (pg->peer_info.count(peer)) {
dout(10) << " have osd" << peer << " info " << pg->peer_info[peer] << dendl;
continue;
@ -4348,21 +4385,19 @@ PG::RecoveryState::GetLog::GetLog(my_context ctx) :
eversion_t newest_update;
eversion_t oldest_update;
pg->choose_log_location(*context< Peering >().prior_set.get(),
need_backlog,
wait_on_backlog,
newest_update_osd,
newest_update,
oldest_update);
if (!pg->choose_acting(newest_update_osd == -1 ? pg->osd->whoami : newest_update_osd)) {
if (!pg->choose_log_location(*context< Peering >().prior_set.get(),
need_backlog,
wait_on_backlog,
newest_update_osd,
newest_update,
oldest_update)) {
post_event(NeedNewMap());
} else {
if (need_backlog && !pg->log.backlog) {
pg->osd->queue_generate_backlog(pg);
}
if (newest_update_osd != -1) {
if (newest_update_osd != pg->osd->whoami) {
dout(10) << " requesting master log from osd" << newest_update_osd << dendl;
context<RecoveryMachine>().send_query(newest_update_osd,
wait_on_backlog ?
@ -4374,7 +4409,7 @@ PG::RecoveryState::GetLog::GetLog(my_context ctx) :
wait_on_backlog = false;
}
if (!wait_on_backlog && newest_update_osd == -1) {
if (!wait_on_backlog && newest_update_osd == pg->osd->whoami) {
dout(10) << " neither backlog nor master log needed, "
<< "moving to GetMissing" << dendl;
post_event(GotLog());
@ -4385,7 +4420,11 @@ PG::RecoveryState::GetLog::GetLog(my_context ctx) :
boost::statechart::result
PG::RecoveryState::GetLog::react(const MLogRec& logevt) {
assert(!msg);
assert(logevt.from == newest_update_osd);
if (logevt.from != newest_update_osd) {
dout(10) << "GetLog: discarding log from "
<< "non-newest_update_osd osd" << logevt.from << dendl;
return discard_event();
}
dout(10) << "GetLog: recieved master log from osd"
<< logevt.from << dendl;
msg = logevt.msg;
@ -4402,7 +4441,8 @@ PG::RecoveryState::GetLog::react(const MLogRec& logevt) {
boost::statechart::result
PG::RecoveryState::GetLog::react(const BacklogComplete&) {
wait_on_backlog = false;
if (msg || newest_update_osd == -1) {
PG *pg = context< RecoveryMachine >().pg;
if (msg || newest_update_osd == pg->osd->whoami) {
dout(10) << "GetLog: already have/don't need master log."
<< "moving on to GetMissing" << dendl;
post_event(GotLog());
@ -4576,7 +4616,7 @@ void PG::RecoveryState::RecoveryMachine::log_exit(const char *state_name, utime_
}
/*----RecoverState Methods-----*/
/*----RecoveryState Methods-----*/
#undef dout_prefix
#define dout_prefix *_dout << machine.pg->gen_prefix()
@ -4715,11 +4755,9 @@ PG::PgPriorSet::PgPriorSet(int whoami,
*/
// current up and/or acting nodes, of course.
for (unsigned i=0; i<up.size(); i++)
if (up[i] != whoami)
cur.insert(up[i]);
cur.insert(up[i]);
for (unsigned i=0; i<acting.size(); i++)
if (acting[i] != whoami)
cur.insert(acting[i]);
cur.insert(acting[i]);
// see if i have ever started since joining the pg. this is important only
// if we want to exclude lost osds.
@ -4763,10 +4801,8 @@ PG::PgPriorSet::PgPriorSet(int whoami,
for (unsigned i=0; i<interval.up.size(); i++) {
int o = interval.up[i];
if (osdmap.is_up(o)) { // is up now
if (o != whoami) // and is not me
cur.insert(o);
}
if (osdmap.is_up(o)) // is up now
cur.insert(o);
}
// consider ACTING osds
@ -4790,6 +4826,9 @@ PG::PgPriorSet::PgPriorSet(int whoami,
}
if (osdmap.is_up(o)) { // is up now
// include past acting osds if they are up
cur.insert(o);
// did any osds survive _this_ interval?
any_survived = true;
} else if (!pinfo || pinfo->lost_at > interval.first) {
@ -4857,5 +4896,12 @@ PG::PgPriorSet::PgPriorSet(int whoami,
}
}
dout(10) << "build_prior final: cur " << cur << " down " << down << " lost " << lost
<< " up_thru " << up_thru
<< " inter_up_thru " << inter_up_thru
<< (crashed ? " crashed":"")
<< (pg_down ? " pg_down":"")
<< (some_down ? " some_down":"")
<< dendl;
}

View File

@ -1277,6 +1277,9 @@ protected:
// primary-only, recovery-only state
set<int> might_have_unfound; // These osds might have objects on them
// which are unfound on the primary
epoch_t last_warm_restart;
friend class OSD;
@ -1326,7 +1329,7 @@ public:
void clear_prior();
bool prior_set_affected(PgPriorSet &prior, const OSDMap *osdmap) const;
bool adjust_need_up_thru(PgPriorSet &prior, const OSDMap *osdmap);
bool adjust_need_up_thru(const OSDMap *osdmap);
bool all_unfound_are_lost(const OSDMap* osdmap) const;
void mark_obj_as_lost(ObjectStore::Transaction& t,
@ -1371,14 +1374,14 @@ public:
void trim_write_ahead();
bool choose_acting(int newest_update_osd);
bool choose_acting(int newest_update_osd) const;
bool recover_master_log(map< int, map<pg_t,Query> >& query_map,
eversion_t &oldest_update);
eversion_t calc_oldest_known_update() const;
void do_peer(ObjectStore::Transaction& t, list<Context*>& tfin,
map< int, map<pg_t,Query> >& query_map,
map<int, MOSDPGInfo*> *activator_map=0);
void choose_log_location(const PgPriorSet &prior_set,
bool choose_log_location(const PgPriorSet &prior_set,
bool &need_backlog,
bool &wait_on_backlog,
int &pull_from,
@ -1481,6 +1484,7 @@ public:
have_master_log(true),
recovery_state(this),
need_up_thru(false),
last_warm_restart(0),
pg_stats_lock("PG::pg_stats_lock"),
pg_stats_valid(false),
finish_sync_event(NULL),
@ -1563,7 +1567,8 @@ public:
pair<int, Info> &notify_info);
void fulfill_log(int from, const Query &query);
bool acting_up_affected(const vector<int>& newup, const vector<int>& newacting);
bool old_peering_msg(const epoch_t &msg_epoch);
// recovery bits
void handle_notify(int from, PG::Info& i, RecoveryCtx *rctx) {
recovery_state.handle_notify(from, i, rctx);

View File

@ -512,7 +512,7 @@ void ReplicatedPG::do_op(MOSDOp *op)
obc->ondisk_read_unlock();
}
if (result == -EAGAIN) // must have referenced non-existent class
if (result == -EAGAIN)
return;
// prepare the reply
@ -1417,7 +1417,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops,
break;
case CEPH_OSD_OP_ROLLBACK :
_rollback_to(ctx, op);
result = _rollback_to(ctx, op);
break;
case CEPH_OSD_OP_ZERO:
@ -1908,7 +1908,7 @@ inline void ReplicatedPG::_delete_head(OpContext *ctx)
info.stats.num_wr++;
}
void ReplicatedPG::_rollback_to(OpContext *ctx, ceph_osd_op& op)
int ReplicatedPG::_rollback_to(OpContext *ctx, ceph_osd_op& op)
{
SnapSetContext *ssc = ctx->obc->ssc;
ObjectState& obs = ctx->new_obs;
@ -1916,11 +1916,12 @@ void ReplicatedPG::_rollback_to(OpContext *ctx, ceph_osd_op& op)
const sobject_t& soid = oi.soid;
ObjectStore::Transaction& t = ctx->op_t;
snapid_t snapid = (uint64_t)op.snap.snapid;
snapid_t cloneid = 0;
dout(10) << "_rollback_to " << soid << " snapid " << snapid << dendl;
ObjectContext *rollback_to;
int ret = find_object_context(soid.oid, oi.oloc, snapid, &rollback_to, false);
int ret = find_object_context(soid.oid, oi.oloc, snapid, &rollback_to, false, &cloneid);
if (ret) {
if (-ENOENT == ret) {
// there's no snapshot here, or there's no object.
@ -1932,7 +1933,11 @@ void ReplicatedPG::_rollback_to(OpContext *ctx, ceph_osd_op& op)
/* a different problem, like degraded pool
* with not-yet-restored object. We shouldn't have been able
* to get here; recovery should have completed first! */
assert(0);
sobject_t rollback_target(soid.oid, cloneid);
assert(is_missing_object(rollback_target));
dout(20) << "_rollback_to attempted to roll back to a missing object "
<< rollback_target << " (requested snapid: ) " << snapid << dendl;
wait_for_missing_object(rollback_target, ctx->op);
} else {
// ummm....huh? It *can't* return anything else at time of writing.
assert(0);
@ -1980,6 +1985,7 @@ void ReplicatedPG::_rollback_to(OpContext *ctx, ceph_osd_op& op)
ssc->snapset.clone_overlap[*ssc->snapset.clones.rbegin()] = overlaps;
}
}
return ret;
}
void ReplicatedPG::_make_clone(ObjectStore::Transaction& t,

View File

@ -673,7 +673,7 @@ public:
void do_osd_op_effects(OpContext *ctx);
private:
void _delete_head(OpContext *ctx);
void _rollback_to(OpContext *ctx, ceph_osd_op& op);
int _rollback_to(OpContext *ctx, ceph_osd_op& op);
public:
bool same_for_read_since(epoch_t e);
bool same_for_modify_since(epoch_t e);