osd/: refactor past_intervals logic into PastIntervals

Signed-off-by: Samuel Just <sjust@redhat.com>
This commit is contained in:
Samuel Just 2017-01-05 10:44:30 -08:00 committed by Sage Weil
parent e4c7ecea4a
commit 065bb89ca6
16 changed files with 765 additions and 541 deletions

View File

@ -26,7 +26,7 @@ class MOSDPGInfo : public Message {
epoch_t epoch;
public:
vector<pair<pg_notify_t,pg_interval_map_t> > pg_list;
vector<pair<pg_notify_t,PastIntervals> > pg_list;
epoch_t get_epoch() const { return epoch; }
@ -47,7 +47,7 @@ public:
void print(ostream& out) const override {
out << "pg_info(" << pg_list.size() << " pgs e" << epoch << ":";
for (vector<pair<pg_notify_t,pg_interval_map_t> >::const_iterator i = pg_list.begin();
for (vector<pair<pg_notify_t,PastIntervals> >::const_iterator i = pg_list.begin();
i != pg_list.end();
++i) {
if (i != pg_list.begin())
@ -66,26 +66,26 @@ public:
// v1 was vector<pg_info_t>
__u32 n = pg_list.size();
::encode(n, payload);
for (vector<pair<pg_notify_t,pg_interval_map_t> >::iterator p = pg_list.begin();
for (vector<pair<pg_notify_t,PastIntervals> >::iterator p = pg_list.begin();
p != pg_list.end();
p++)
::encode(p->first.info, payload);
// v2 needs the pg_interval_map_t for each record
for (vector<pair<pg_notify_t,pg_interval_map_t> >::iterator p = pg_list.begin();
// v2 needs the PastIntervals for each record
for (vector<pair<pg_notify_t,PastIntervals> >::iterator p = pg_list.begin();
p != pg_list.end();
p++)
::encode(p->second, payload);
// v3 needs epoch_sent, query_epoch
for (vector<pair<pg_notify_t,pg_interval_map_t> >::iterator p = pg_list.begin();
for (vector<pair<pg_notify_t,PastIntervals> >::iterator p = pg_list.begin();
p != pg_list.end();
p++)
::encode(pair<epoch_t, epoch_t>(
p->first.epoch_sent, p->first.query_epoch), payload);
// v4 needs from, to
for (vector<pair<pg_notify_t, pg_interval_map_t> >::iterator p = pg_list.begin();
for (vector<pair<pg_notify_t, PastIntervals> >::iterator p = pg_list.begin();
p != pg_list.end();
++p) {
::encode(p->first.from, payload);
@ -105,14 +105,14 @@ public:
}
if (header.version >= 2) {
// get the pg_interval_map_t portion
// get the PastIntervals portion
for (unsigned i=0; i<n; i++) {
::decode(pg_list[i].second, p);
}
}
// v3 needs epoch_sent, query_epoch
for (vector<pair<pg_notify_t,pg_interval_map_t> >::iterator i = pg_list.begin();
for (vector<pair<pg_notify_t,PastIntervals> >::iterator i = pg_list.begin();
i != pg_list.end();
i++) {
if (header.version >= 3) {
@ -128,7 +128,7 @@ public:
// v4 needs from and to
if (header.version >= 4) {
for (vector<pair<pg_notify_t, pg_interval_map_t> >::iterator i = pg_list.begin();
for (vector<pair<pg_notify_t, PastIntervals> >::iterator i = pg_list.begin();
i != pg_list.end();
i++) {
::decode(i->first.from, p);

View File

@ -36,7 +36,7 @@ public:
pg_info_t info;
pg_log_t log;
pg_missing_t missing;
pg_interval_map_t past_intervals;
PastIntervals past_intervals;
epoch_t get_epoch() const { return epoch; }
spg_t get_pgid() const { return spg_t(info.pgid.pgid, to); }

View File

@ -33,11 +33,11 @@ class MOSDPGNotify : public Message {
/// the current epoch if this is not being sent in response to a
/// query. This allows the recipient to disregard responses to old
/// queries.
vector<pair<pg_notify_t,pg_interval_map_t> > pg_list; // pgid -> version
vector<pair<pg_notify_t,PastIntervals> > pg_list; // pgid -> version
public:
version_t get_epoch() const { return epoch; }
const vector<pair<pg_notify_t,pg_interval_map_t> >& get_pg_list() const {
const vector<pair<pg_notify_t,PastIntervals> >& get_pg_list() const {
return pg_list;
}
@ -45,7 +45,7 @@ class MOSDPGNotify : public Message {
: Message(MSG_OSD_PG_NOTIFY, HEAD_VERSION, COMPAT_VERSION) {
set_priority(CEPH_MSG_PRIO_HIGH);
}
MOSDPGNotify(epoch_t e, vector<pair<pg_notify_t,pg_interval_map_t> >& l)
MOSDPGNotify(epoch_t e, vector<pair<pg_notify_t,PastIntervals> >& l)
: Message(MSG_OSD_PG_NOTIFY, HEAD_VERSION, COMPAT_VERSION),
epoch(e) {
pg_list.swap(l);
@ -68,21 +68,21 @@ public:
// v2 was vector<pg_info_t>
__u32 n = pg_list.size();
::encode(n, payload);
for (vector<pair<pg_notify_t,pg_interval_map_t> >::iterator p = pg_list.begin();
for (vector<pair<pg_notify_t,PastIntervals> >::iterator p = pg_list.begin();
p != pg_list.end();
p++)
::encode(p->first.info, payload);
::encode(query_epoch, payload);
// v3 needs the pg_interval_map_t for each record
for (vector<pair<pg_notify_t,pg_interval_map_t> >::iterator p = pg_list.begin();
// v3 needs the PastIntervals for each record
for (vector<pair<pg_notify_t,PastIntervals> >::iterator p = pg_list.begin();
p != pg_list.end();
p++)
::encode(p->second, payload);
// v4 needs epoch_sent, query_epoch
for (vector<pair<pg_notify_t,pg_interval_map_t> >::iterator p = pg_list.begin();
for (vector<pair<pg_notify_t,PastIntervals> >::iterator p = pg_list.begin();
p != pg_list.end();
p++)
::encode(pair<epoch_t, epoch_t>(
@ -90,7 +90,7 @@ public:
payload);
// v5 needs from, to
for (vector<pair<pg_notify_t, pg_interval_map_t> >::iterator p = pg_list.begin();
for (vector<pair<pg_notify_t, PastIntervals> >::iterator p = pg_list.begin();
p != pg_list.end();
++p) {
::encode(p->first.from, payload);
@ -113,14 +113,14 @@ public:
::decode(query_epoch, p);
if (header.version >= 3) {
// get the pg_interval_map_t portion
// get the PastIntervals portion
for (unsigned i=0; i<n; i++) {
::decode(pg_list[i].second, p);
}
}
// v3 needs epoch_sent, query_epoch
for (vector<pair<pg_notify_t,pg_interval_map_t> >::iterator i = pg_list.begin();
for (vector<pair<pg_notify_t,PastIntervals> >::iterator i = pg_list.begin();
i != pg_list.end();
i++) {
if (header.version >= 4) {
@ -136,7 +136,7 @@ public:
// v5 needs from and to
if (header.version >= 5) {
for (vector<pair<pg_notify_t, pg_interval_map_t> >::iterator i = pg_list.begin();
for (vector<pair<pg_notify_t, PastIntervals> >::iterator i = pg_list.begin();
i != pg_list.end();
i++) {
::decode(i->first.from, p);
@ -146,7 +146,7 @@ public:
}
void print(ostream& out) const override {
out << "pg_notify(";
for (vector<pair<pg_notify_t,pg_interval_map_t> >::const_iterator i = pg_list.begin();
for (vector<pair<pg_notify_t,PastIntervals> >::const_iterator i = pg_list.begin();
i != pg_list.end();
++i) {
if (i != pg_list.begin())

View File

@ -3499,7 +3499,7 @@ PG *OSD::_create_lock_pg(
vector<int>& up, int up_primary,
vector<int>& acting, int acting_primary,
pg_history_t history,
const pg_interval_map_t& pi,
const PastIntervals& pi,
ObjectStore::Transaction& t)
{
assert(osd_lock.is_locked());
@ -3720,23 +3720,36 @@ void OSD::build_past_intervals_parallel()
++i) {
PG *pg = i->second;
epoch_t start, end;
if (!pg->_calc_past_interval_range(&start, &end, superblock.oldest_map)) {
if (pg->info.history.same_interval_since == 0)
pg->info.history.same_interval_since = end;
continue;
auto rpib = pg->get_required_past_interval_bounds(
pg->info,
superblock.oldest_map);
if (rpib.first == rpib.second && pg->past_intervals.empty()) {
if (pg->info.history.same_interval_since == 0) {
pg->info.history.same_interval_since = rpib.second;
}
continue;
} else {
auto apib = pg->past_intervals.get_bounds();
if (rpib.second == apib.second &&
apib.first.first <= rpib.first) {
if (pg->info.history.same_interval_since == 0) {
pg->info.history.same_interval_since = rpib.second;
}
continue;
}
}
dout(10) << pg->info.pgid << " needs " << start << "-" << end << dendl;
dout(10) << pg->info.pgid << " needs " << rpib.first << "-"
<< rpib.second << dendl;
pistate& p = pis[pg];
p.start = start;
p.end = end;
p.start = rpib.first;
p.end = rpib.second;
p.same_interval_since = 0;
if (start < cur_epoch)
cur_epoch = start;
if (end > end_epoch)
end_epoch = end;
if (rpib.first < cur_epoch)
cur_epoch = rpib.first;
if (rpib.second > end_epoch)
end_epoch = rpib.second;
}
}
if (pis.empty()) {
@ -3785,7 +3798,7 @@ void OSD::build_past_intervals_parallel()
boost::scoped_ptr<IsPGRecoverablePredicate> recoverable(
pg->get_is_recoverable_predicate());
std::stringstream debug;
bool new_interval = pg_interval_t::check_new_interval(
bool new_interval = PastIntervals::check_new_interval(
p.primary,
primary,
p.old_acting, acting,
@ -3859,7 +3872,7 @@ void OSD::build_past_intervals_parallel()
int OSD::handle_pg_peering_evt(
spg_t pgid,
const pg_history_t& orig_history,
const pg_interval_map_t& pi,
const PastIntervals& pi,
epoch_t epoch,
PG::CephPeeringEvtRef evt)
{
@ -3948,7 +3961,7 @@ int OSD::handle_pg_peering_evt(
vector<int> old_acting = old_pg_state->acting;
int old_primary = old_pg_state->primary.osd;
pg_history_t old_history = old_pg_state->info.history;
pg_interval_map_t old_past_intervals = old_pg_state->past_intervals;
PastIntervals old_past_intervals = old_pg_state->past_intervals;
old_pg_state->unlock();
pg = _create_lock_pg(
old_osd_map,
@ -3984,7 +3997,7 @@ int OSD::handle_pg_peering_evt(
vector<int> old_acting = old_pg_state->acting;
int old_primary = old_pg_state->primary.osd;
pg_history_t old_history = old_pg_state->info.history;
pg_interval_map_t old_past_intervals = old_pg_state->past_intervals;
PastIntervals old_past_intervals = old_pg_state->past_intervals;
old_pg_state->unlock();
PG *parent = _create_lock_pg(
old_osd_map,
@ -7941,7 +7954,7 @@ void OSD::handle_pg_create(OpRequestRef op)
bool mapped = osdmap->get_primary_shard(on, &pgid);
assert(mapped);
pg_interval_map_t pi;
PastIntervals pi;
pg_history_t history;
history.epoch_created = created;
history.last_scrub_stamp = ci->second;
@ -7999,10 +8012,10 @@ PG::RecoveryCtx OSD::create_context()
C_Contexts *on_safe = new C_Contexts(cct);
map<int, map<spg_t,pg_query_t> > *query_map =
new map<int, map<spg_t, pg_query_t> >;
map<int,vector<pair<pg_notify_t, pg_interval_map_t> > > *notify_list =
new map<int, vector<pair<pg_notify_t, pg_interval_map_t> > >;
map<int,vector<pair<pg_notify_t, pg_interval_map_t> > > *info_map =
new map<int,vector<pair<pg_notify_t, pg_interval_map_t> > >;
map<int,vector<pair<pg_notify_t, PastIntervals> > > *notify_list =
new map<int, vector<pair<pg_notify_t, PastIntervals> > >;
map<int,vector<pair<pg_notify_t, PastIntervals> > > *info_map =
new map<int,vector<pair<pg_notify_t, PastIntervals> > >;
PG::RecoveryCtx rctx(query_map, info_map, notify_list,
on_applied, on_safe, t);
return rctx;
@ -8084,11 +8097,11 @@ void OSD::dispatch_context(PG::RecoveryCtx &ctx, PG *pg, OSDMapRef curmap,
*/
void OSD::do_notifies(
map<int,vector<pair<pg_notify_t,pg_interval_map_t> > >& notify_list,
map<int,vector<pair<pg_notify_t,PastIntervals> > >& notify_list,
OSDMapRef curmap)
{
for (map<int,
vector<pair<pg_notify_t,pg_interval_map_t> > >::iterator it =
vector<pair<pg_notify_t,PastIntervals> > >::iterator it =
notify_list.begin();
it != notify_list.end();
++it) {
@ -8143,11 +8156,11 @@ void OSD::do_queries(map<int, map<spg_t,pg_query_t> >& query_map,
void OSD::do_infos(map<int,
vector<pair<pg_notify_t, pg_interval_map_t> > >& info_map,
vector<pair<pg_notify_t, PastIntervals> > >& info_map,
OSDMapRef curmap)
{
for (map<int,
vector<pair<pg_notify_t, pg_interval_map_t> > >::iterator p =
vector<pair<pg_notify_t, PastIntervals> > >::iterator p =
info_map.begin();
p != info_map.end();
++p) {
@ -8155,7 +8168,7 @@ void OSD::do_infos(map<int,
dout(20) << __func__ << " skipping down osd." << p->first << dendl;
continue;
}
for (vector<pair<pg_notify_t,pg_interval_map_t> >::iterator i = p->second.begin();
for (vector<pair<pg_notify_t,PastIntervals> >::iterator i = p->second.begin();
i != p->second.end();
++i) {
dout(20) << __func__ << " sending info " << i->first.info
@ -8457,7 +8470,7 @@ void OSD::handle_pg_query(OpRequestRef op)
op->mark_started();
map< int, vector<pair<pg_notify_t, pg_interval_map_t> > > notify_list;
map< int, vector<pair<pg_notify_t, PastIntervals> > > notify_list;
for (auto it = m->pg_list.begin();
it != m->pg_list.end();
@ -8541,7 +8554,7 @@ void OSD::handle_pg_query(OpRequestRef op)
it->second.epoch_sent,
osdmap->get_epoch(),
empty),
pg_interval_map_t()));
PastIntervals()));
}
}
do_notifies(notify_list, osdmap);

View File

@ -2050,7 +2050,7 @@ protected:
vector<int>& up, int up_primary,
vector<int>& acting, int acting_primary,
pg_history_t history,
const pg_interval_map_t& pi,
const PastIntervals& pi,
ObjectStore::Transaction& t);
PG* _make_pg(OSDMapRef createmap, spg_t pgid);
@ -2060,7 +2060,7 @@ protected:
int handle_pg_peering_evt(
spg_t pgid,
const pg_history_t& orig_history,
const pg_interval_map_t& pi,
const PastIntervals& pi,
epoch_t epoch,
PG::CephPeeringEvtRef evt);
@ -2202,13 +2202,13 @@ protected:
void dispatch_context_transaction(PG::RecoveryCtx &ctx, PG *pg,
ThreadPool::TPHandle *handle = NULL);
void do_notifies(map<int,
vector<pair<pg_notify_t, pg_interval_map_t> > >&
vector<pair<pg_notify_t, PastIntervals> > >&
notify_list,
OSDMapRef map);
void do_queries(map<int, map<spg_t,pg_query_t> >& query_map,
OSDMapRef map);
void do_infos(map<int,
vector<pair<pg_notify_t, pg_interval_map_t> > >& info_map,
vector<pair<pg_notify_t, PastIntervals> > >& info_map,
OSDMapRef map);
bool require_mon_peer(const Message *m);

View File

@ -713,114 +713,53 @@ bool PG::needs_backfill() const
return false;
}
bool PG::_calc_past_interval_range(epoch_t *start, epoch_t *end, epoch_t oldest_map)
void PG::check_past_interval_bounds() const
{
if (info.history.same_interval_since) {
*end = info.history.same_interval_since;
} else {
// PG must be imported, so let's calculate the whole range.
*end = osdmap_ref->get_epoch();
}
// Do we already have the intervals we want?
map<epoch_t,pg_interval_t>::const_iterator pif = past_intervals.begin();
if (pif != past_intervals.end()) {
if (pif->first <= info.history.last_epoch_clean) {
dout(10) << __func__ << ": already have past intervals back to "
<< info.history.last_epoch_clean << dendl;
return false;
}
*end = pif->first;
}
*start = MAX(MAX(info.history.epoch_created,
info.history.last_epoch_clean),
oldest_map);
if (*start >= *end) {
dout(10) << __func__ << " start epoch " << *start << " >= end epoch " << *end
<< ", nothing to do" << dendl;
return false;
}
return true;
}
void PG::generate_past_intervals()
{
epoch_t cur_epoch, end_epoch;
if (!_calc_past_interval_range(&cur_epoch, &end_epoch,
osd->get_superblock().oldest_map)) {
if (info.history.same_interval_since == 0) {
info.history.same_interval_since = end_epoch;
dirty_info = true;
}
return;
}
OSDMapRef last_map, cur_map;
int primary = -1;
int up_primary = -1;
vector<int> acting, up, old_acting, old_up;
cur_map = osd->get_map(cur_epoch);
cur_map->pg_to_up_acting_osds(
get_pgid().pgid, &up, &up_primary, &acting, &primary);
epoch_t same_interval_since = cur_epoch;
dout(10) << __func__ << " over epochs " << cur_epoch << "-"
<< end_epoch << dendl;
++cur_epoch;
for (; cur_epoch <= end_epoch; ++cur_epoch) {
int old_primary = primary;
int old_up_primary = up_primary;
last_map.swap(cur_map);
old_up.swap(up);
old_acting.swap(acting);
cur_map = osd->get_map(cur_epoch);
pg_t pgid = get_pgid().pgid;
if (last_map->get_pools().count(pgid.pool()))
pgid = pgid.get_ancestor(last_map->get_pg_num(pgid.pool()));
cur_map->pg_to_up_acting_osds(pgid, &up, &up_primary, &acting, &primary);
boost::scoped_ptr<IsPGRecoverablePredicate> recoverable(
get_is_recoverable_predicate());
std::stringstream debug;
bool new_interval = pg_interval_t::check_new_interval(
old_primary,
primary,
old_acting,
acting,
old_up_primary,
up_primary,
old_up,
up,
same_interval_since,
info.history.last_epoch_clean,
cur_map,
last_map,
pgid,
recoverable.get(),
&past_intervals,
&debug);
if (new_interval) {
dout(10) << debug.str() << dendl;
same_interval_since = cur_epoch;
auto rpib = get_required_past_interval_bounds(
info,
osd->get_superblock().oldest_map);
if (rpib.first == rpib.second) {
if (!past_intervals.empty()) {
osd->clog->error() << info.pgid << " required past_interval bounds are "
<< " empty " << rpib << " but past_intervals is not "
<< past_intervals;
derr << info.pgid << " required past_interval bounds are "
<< " empty " << rpib << " but past_intervals is not "
<< past_intervals << dendl;
assert(past_intervals.empty());
}
}
// PG import needs recalculated same_interval_since
if (info.history.same_interval_since == 0) {
assert(same_interval_since);
dout(10) << __func__ << " fix same_interval_since " << same_interval_since << " pg " << *this << dendl;
dout(10) << __func__ << " past_intervals " << past_intervals << dendl;
// Fix it
info.history.same_interval_since = same_interval_since;
if (past_intervals.empty()) {
osd->clog->error() << info.pgid << " required past_interval bounds are "
<< " not empty " << rpib << " but past_intervals "
<< past_intervals << " is empty";
derr << info.pgid << " required past_interval bounds are "
<< " not empty " << rpib << " but past_intervals "
<< past_intervals << " is empty" << dendl;
assert(!past_intervals.empty());
}
auto apib = past_intervals.get_bounds();
// record our work.
dirty_info = true;
dirty_big_info = true;
if ((apib.first.first > rpib.first) ||
(apib.first.second <= rpib.first)) {
osd->clog->error() << info.pgid << " past_intervals " << apib
<< " start interval does not contain the required"
<< " bound " << rpib << " start";
derr << info.pgid << " past_intervals " << apib
<< " start interval does not contain the required"
<< " bound " << rpib << " start" << dendl;
assert(0 == "past_interval start interval mismatch");
}
if (apib.second != rpib.second) {
osd->clog->error() << info.pgid << " past_interal bound " << apib
<< " end does not match required " << rpib
<< " end";
derr << info.pgid << " past_interal bound " << apib
<< " end does not match required " << rpib
<< " end" << dendl;
assert(0 == "past_interval end mismatch");
}
}
/*
@ -830,18 +769,14 @@ void PG::generate_past_intervals()
*/
void PG::trim_past_intervals()
{
std::map<epoch_t,pg_interval_t>::iterator pif = past_intervals.begin();
std::map<epoch_t,pg_interval_t>::iterator end = past_intervals.end();
while (pif != end) {
if (pif->second.last >= info.history.last_epoch_clean)
return;
dout(10) << __func__ << ": trimming " << pif->second << dendl;
past_intervals.erase(pif++);
dirty_big_info = true;
}
past_intervals.trim(
info.history.last_epoch_clean,
[&](const PastIntervals::pg_interval_t &trimmed) {
dout(10) << __func__ << ": trimming " << trimmed << dendl;
dirty_big_info = true;
});
}
bool PG::adjust_need_up_thru(const OSDMapRef osdmap)
{
epoch_t up_thru = osdmap->get_up_thru(osd->whoami);
@ -907,7 +842,7 @@ bool PG::all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const
return true;
}
void PG::build_prior(std::unique_ptr<PriorSet> &prior_set)
PastIntervals::PriorSet PG::build_prior()
{
if (1) {
// sanity check
@ -917,18 +852,16 @@ void PG::build_prior(std::unique_ptr<PriorSet> &prior_set)
assert(info.history.last_epoch_started >= it->second.history.last_epoch_started);
}
}
prior_set.reset(
new PriorSet(
cct,
pool.info.ec_pool(),
get_pgbackend()->get_is_recoverable_predicate(),
*get_osdmap(),
past_intervals,
up,
acting,
info,
this));
PriorSet &prior(*prior_set.get());
PastIntervals::PriorSet prior = past_intervals.get_prior_set(
pool.info.ec_pool(),
get_pgbackend()->get_is_recoverable_predicate(),
*get_osdmap(),
past_intervals,
up,
acting,
info,
this);
if (prior.pg_down) {
state_set(PG_STATE_DOWN);
@ -945,7 +878,8 @@ void PG::build_prior(std::unique_ptr<PriorSet> &prior_set)
<< ", all is well" << dendl;
need_up_thru = false;
}
set_probe_targets(prior_set->probe);
set_probe_targets(prior.probe);
return prior;
}
void PG::clear_primary_state()
@ -1522,32 +1456,11 @@ void PG::build_might_have_unfound()
dout(10) << __func__ << dendl;
// Make sure that we have past intervals.
generate_past_intervals();
check_past_interval_bounds();
// We need to decide who might have unfound objects that we need
std::map<epoch_t,pg_interval_t>::const_reverse_iterator p = past_intervals.rbegin();
std::map<epoch_t,pg_interval_t>::const_reverse_iterator end = past_intervals.rend();
for (; p != end; ++p) {
const pg_interval_t &interval(p->second);
// We already have all the objects that exist at last_epoch_clean,
// so there's no need to look at earlier intervals.
if (interval.last < info.history.last_epoch_clean)
break;
// If nothing changed, we don't care about this interval.
if (!interval.maybe_went_rw)
continue;
int i = 0;
std::vector<int>::const_iterator a = interval.acting.begin();
std::vector<int>::const_iterator a_end = interval.acting.end();
for (; a != a_end; ++a, ++i) {
pg_shard_t shard(*a, pool.info.ec_pool() ? shard_id_t(i) : shard_id_t::NO_SHARD);
if (*a != CRUSH_ITEM_NONE && shard != pg_whoami)
might_have_unfound.insert(shard);
}
}
might_have_unfound = past_intervals.get_might_have_unfound(
pg_whoami,
pool.info.ec_pool());
// include any (stray) peers
for (map<pg_shard_t, pg_info_t>::iterator p = peer_info.begin();
@ -1576,7 +1489,7 @@ void PG::activate(ObjectStore::Transaction& t,
map<int,
vector<
pair<pg_notify_t,
pg_interval_map_t> > > *activator_map,
PastIntervals> > > *activator_map,
RecoveryCtx *ctx)
{
assert(!is_peered());
@ -1940,7 +1853,7 @@ void PG::_activate_committed(epoch_t epoch, epoch_t activation_epoch)
state_set(PG_STATE_PEERED);
}
m->pg_list.push_back(make_pair(i, pg_interval_map_t()));
m->pg_list.push_back(make_pair(i, PastIntervals()));
osd->send_message_osd_cluster(get_primary().osd, m, get_osdmap()->get_epoch());
// waiters
@ -2787,7 +2700,7 @@ void PG::init(
const vector<int>& newup, int new_up_primary,
const vector<int>& newacting, int new_acting_primary,
const pg_history_t& history,
const pg_interval_map_t& pi,
const PastIntervals& pi,
bool backfill,
ObjectStore::Transaction *t)
{
@ -2886,7 +2799,7 @@ int PG::_prepare_write_info(CephContext* cct,
map<string,bufferlist> *km,
epoch_t epoch,
pg_info_t &info, pg_info_t &last_written_info,
map<epoch_t,pg_interval_t> &past_intervals,
PastIntervals &past_intervals,
bool dirty_big_info,
bool dirty_epoch,
bool try_fast_info,
@ -3210,7 +3123,7 @@ std::string PG::get_corrupt_pg_log_name() const
int PG::read_info(
ObjectStore *store, spg_t pgid, const coll_t &coll, bufferlist &bl,
pg_info_t &info, map<epoch_t,pg_interval_t> &past_intervals,
pg_info_t &info, PastIntervals &past_intervals,
__u8 &struct_v)
{
// try for v8 or later
@ -4876,7 +4789,7 @@ void PG::share_pg_info()
get_osdmap()->get_epoch(),
get_osdmap()->get_epoch(),
info),
pg_interval_map_t()));
PastIntervals()));
osd->send_message_osd_cluster(peer.osd, m, get_osdmap()->get_epoch());
}
}
@ -5045,7 +4958,7 @@ bool PG::should_restart_peering(
OSDMapRef lastmap,
OSDMapRef osdmap)
{
if (pg_interval_t::is_new_interval(
if (PastIntervals::is_new_interval(
primary.osd,
newactingprimary,
acting,
@ -5195,7 +5108,7 @@ void PG::start_peering_interval(
assert(info.history.same_interval_since != 0);
boost::scoped_ptr<IsPGRecoverablePredicate> recoverable(
get_is_recoverable_predicate());
bool new_interval = pg_interval_t::check_new_interval(
bool new_interval = PastIntervals::check_new_interval(
old_acting_primary.osd,
new_acting_primary,
oldacting, newacting,
@ -5213,7 +5126,7 @@ void PG::start_peering_interval(
dout(10) << __func__ << ": check_new_interval output: "
<< debug.str() << dendl;
if (new_interval) {
dout(10) << " noting past " << past_intervals.rbegin()->second << dendl;
dout(10) << " noting past " << past_intervals.get_bounds().second << dendl;
dirty_info = true;
dirty_big_info = true;
info.history.same_interval_since = osdmap->get_epoch();
@ -5397,7 +5310,7 @@ ostream& operator<<(ostream& out, const PG& pg)
out << " lpr=" << pg.get_last_peering_reset();
if (!pg.past_intervals.empty()) {
out << " pi=" << pg.past_intervals.begin()->first << "-" << pg.past_intervals.rbegin()->second.last
out << " pi=" << pg.past_intervals.get_bounds()
<< "/" << pg.past_intervals.size();
}
@ -5884,15 +5797,6 @@ void PG::update_store_on_load()
}
}
std::ostream& operator<<(std::ostream& oss,
const struct PG::PriorSet &prior)
{
oss << "PriorSet[probe=" << prior.probe << " "
<< "down=" << prior.down << " "
<< "blocked_by=" << prior.blocked_by << "]";
return oss;
}
/*------------ Recovery State Machine----------------*/
#undef dout_prefix
#define dout_prefix (*_dout << context< RecoveryMachine >().pg->gen_prefix() \
@ -6061,10 +5965,6 @@ boost::statechart::result PG::RecoveryState::Reset::react(const AdvMap& advmap)
PG *pg = context< RecoveryMachine >().pg;
ldout(pg->cct, 10) << "Reset advmap" << dendl;
// make sure we have past_intervals filled in. hopefully this will happen
// _before_ we are active.
pg->generate_past_intervals();
pg->check_full_transition(advmap.lastmap, advmap.osdmap);
if (pg->should_restart_peering(
@ -6083,6 +5983,7 @@ boost::statechart::result PG::RecoveryState::Reset::react(const AdvMap& advmap)
context< RecoveryMachine >().get_cur_transaction());
}
pg->remove_down_peer_info(advmap.osdmap);
pg->check_past_interval_bounds();
return discard_event();
}
@ -6225,7 +6126,7 @@ boost::statechart::result PG::RecoveryState::Peering::react(const AdvMap& advmap
{
PG *pg = context< RecoveryMachine >().pg;
ldout(pg->cct, 10) << "Peering advmap" << dendl;
if (prior_set.get()->affected_by_map(advmap.osdmap, pg)) {
if (prior_set.affected_by_map(*(advmap.osdmap), pg)) {
ldout(pg->cct, 1) << "Peering, affected_by_map, going to Reset" << dendl;
post_event(advmap);
return transit< Reset >();
@ -6245,35 +6146,29 @@ boost::statechart::result PG::RecoveryState::Peering::react(const QueryState& q)
q.f->dump_stream("enter_time") << enter_time;
q.f->open_array_section("past_intervals");
for (map<epoch_t,pg_interval_t>::iterator p = pg->past_intervals.begin();
p != pg->past_intervals.end();
++p) {
q.f->open_object_section("past_interval");
p->second.dump(q.f);
q.f->close_section();
}
pg->past_intervals.dump(q.f);
q.f->close_section();
q.f->open_array_section("probing_osds");
for (set<pg_shard_t>::iterator p = prior_set->probe.begin();
p != prior_set->probe.end();
for (set<pg_shard_t>::iterator p = prior_set.probe.begin();
p != prior_set.probe.end();
++p)
q.f->dump_stream("osd") << *p;
q.f->close_section();
if (prior_set->pg_down)
if (prior_set.pg_down)
q.f->dump_string("blocked", "peering is blocked due to down osds");
q.f->open_array_section("down_osds_we_would_probe");
for (set<int>::iterator p = prior_set->down.begin();
p != prior_set->down.end();
for (set<int>::iterator p = prior_set.down.begin();
p != prior_set.down.end();
++p)
q.f->dump_int("osd", *p);
q.f->close_section();
q.f->open_array_section("peering_blocked_by");
for (map<int,epoch_t>::iterator p = prior_set->blocked_by.begin();
p != prior_set->blocked_by.end();
for (map<int,epoch_t>::iterator p = prior_set.blocked_by.begin();
p != prior_set.blocked_by.end();
++p) {
q.f->open_object_section("osd");
q.f->dump_int("osd", p->first);
@ -7454,17 +7349,16 @@ PG::RecoveryState::GetInfo::GetInfo(my_context ctx)
context< RecoveryMachine >().log_enter(state_name);
PG *pg = context< RecoveryMachine >().pg;
pg->generate_past_intervals();
unique_ptr<PriorSet> &prior_set = context< Peering >().prior_set;
pg->check_past_interval_bounds();
PastIntervals::PriorSet &prior_set = context< Peering >().prior_set;
assert(pg->blocked_by.empty());
if (!prior_set.get())
pg->build_prior(prior_set);
prior_set = pg->build_prior();
pg->reset_min_peer_features();
get_infos();
if (prior_set->pg_down) {
if (prior_set.pg_down) {
post_event(IsDown());
} else if (peer_info_requested.empty()) {
post_event(GotInfo());
@ -7474,11 +7368,11 @@ PG::RecoveryState::GetInfo::GetInfo(my_context ctx)
void PG::RecoveryState::GetInfo::get_infos()
{
PG *pg = context< RecoveryMachine >().pg;
unique_ptr<PriorSet> &prior_set = context< Peering >().prior_set;
PastIntervals::PriorSet &prior_set = context< Peering >().prior_set;
pg->blocked_by.clear();
for (set<pg_shard_t>::const_iterator it = prior_set->probe.begin();
it != prior_set->probe.end();
for (set<pg_shard_t>::const_iterator it = prior_set.probe.begin();
it != prior_set.probe.end();
++it) {
pg_shard_t peer = *it;
if (peer == pg->pg_whoami) {
@ -7522,17 +7416,17 @@ boost::statechart::result PG::RecoveryState::GetInfo::react(const MNotifyRec& in
if (pg->proc_replica_info(
infoevt.from, infoevt.notify.info, infoevt.notify.epoch_sent)) {
// we got something new ...
unique_ptr<PriorSet> &prior_set = context< Peering >().prior_set;
PastIntervals::PriorSet &prior_set = context< Peering >().prior_set;
if (old_start < pg->info.history.last_epoch_started) {
ldout(pg->cct, 10) << " last_epoch_started moved forward, rebuilding prior" << dendl;
pg->build_prior(prior_set);
prior_set = pg->build_prior();
// filter out any osds that got dropped from the probe set from
// peer_info_requested. this is less expensive than restarting
// peering (which would re-probe everyone).
set<pg_shard_t>::iterator p = peer_info_requested.begin();
while (p != peer_info_requested.end()) {
if (prior_set->probe.count(*p) == 0) {
if (prior_set.probe.count(*p) == 0) {
ldout(pg->cct, 20) << " dropping osd." << *p << " from info_requested, no longer in probe set" << dendl;
peer_info_requested.erase(p++);
} else {
@ -7801,9 +7695,9 @@ PG::RecoveryState::Down::Down(my_context ctx)
pg->state_clear(PG_STATE_PEERING);
pg->state_set(PG_STATE_DOWN);
unique_ptr<PriorSet> &prior_set = context< Peering >().prior_set;
auto &prior_set = context< Peering >().prior_set;
assert(pg->blocked_by.empty());
pg->blocked_by.insert(prior_set->down.begin(), prior_set->down.end());
pg->blocked_by.insert(prior_set.down.begin(), prior_set.down.end());
pg->publish_stats_to_osd();
}
@ -7842,9 +7736,9 @@ PG::RecoveryState::Incomplete::Incomplete(my_context ctx)
pg->state_clear(PG_STATE_PEERING);
pg->state_set(PG_STATE_INCOMPLETE);
unique_ptr<PriorSet> &prior_set = context< Peering >().prior_set;
PastIntervals::PriorSet &prior_set = context< Peering >().prior_set;
assert(pg->blocked_by.empty());
pg->blocked_by.insert(prior_set->down.begin(), prior_set->down.end());
pg->blocked_by.insert(prior_set.down.begin(), prior_set.down.end());
pg->publish_stats_to_osd();
}
@ -8109,169 +8003,6 @@ void PG::RecoveryState::RecoveryMachine::log_exit(const char *state_name, utime_
#undef dout_prefix
#define dout_prefix (*_dout << (debug_pg ? debug_pg->gen_prefix() : string()) << " PriorSet: ")
PG::PriorSet::PriorSet(CephContext* cct,
bool ec_pool,
IsPGRecoverablePredicate *c,
const OSDMap &osdmap,
const map<epoch_t, pg_interval_t> &past_intervals,
const vector<int> &up,
const vector<int> &acting,
const pg_info_t &info,
const PG *debug_pg)
: cct(cct), ec_pool(ec_pool), pg_down(false), pcontdec(c)
{
// Include current acting and up nodes... not because they may
// contain old data (this interval hasn't gone active, obviously),
// but because we want their pg_info to inform choose_acting(), and
// so that we know what they do/do not have explicitly before
// sending them any new info/logs/whatever.
for (unsigned i = 0; i < acting.size(); i++) {
if (acting[i] != CRUSH_ITEM_NONE)
probe.insert(pg_shard_t(acting[i], ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD));
}
// It may be possible to exclude the up nodes, but let's keep them in
// there for now.
for (unsigned i = 0; i < up.size(); i++) {
if (up[i] != CRUSH_ITEM_NONE)
probe.insert(pg_shard_t(up[i], ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD));
}
for (map<epoch_t,pg_interval_t>::const_reverse_iterator p = past_intervals.rbegin();
p != past_intervals.rend();
++p) {
const pg_interval_t &interval = p->second;
dout(10) << "build_prior " << interval << dendl;
if (interval.last < info.history.last_epoch_started)
break; // we don't care
if (interval.acting.empty())
continue;
if (!interval.maybe_went_rw)
continue;
// look at candidate osds during this interval. each falls into
// one of three categories: up, down (but potentially
// interesting), or lost (down, but we won't wait for it).
set<pg_shard_t> up_now;
bool any_down_now = false; // any candidates down now (that might have useful data)
// consider ACTING osds
for (unsigned i = 0; i < interval.acting.size(); i++) {
int o = interval.acting[i];
if (o == CRUSH_ITEM_NONE)
continue;
pg_shard_t so(o, ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD);
const osd_info_t *pinfo = 0;
if (osdmap.exists(o))
pinfo = &osdmap.get_info(o);
if (osdmap.is_up(o)) {
// include past acting osds if they are up.
probe.insert(so);
up_now.insert(so);
} else if (!pinfo) {
dout(10) << "build_prior prior osd." << o << " no longer exists" << dendl;
down.insert(o);
} else if (pinfo->lost_at > interval.first) {
dout(10) << "build_prior prior osd." << o << " is down, but lost_at " << pinfo->lost_at << dendl;
up_now.insert(so);
down.insert(o);
} else {
dout(10) << "build_prior prior osd." << o << " is down" << dendl;
down.insert(o);
any_down_now = true;
}
}
// if not enough osds survived this interval, and we may have gone rw,
// then we need to wait for one of those osds to recover to
// ensure that we haven't lost any information.
if (!(*pcontdec)(up_now) && any_down_now) {
// fixme: how do we identify a "clean" shutdown anyway?
dout(10) << "build_prior possibly went active+rw, insufficient up;"
<< " including down osds" << dendl;
for (vector<int>::const_iterator i = interval.acting.begin();
i != interval.acting.end();
++i) {
if (osdmap.exists(*i) && // if it doesn't exist, we already consider it lost.
osdmap.is_down(*i)) {
pg_down = true;
// make note of when any down osd in the cur set was lost, so that
// we can notice changes in prior_set_affected.
blocked_by[*i] = osdmap.get_info(*i).lost_at;
}
}
}
}
dout(10) << "build_prior final: probe " << probe
<< " down " << down
<< " blocked_by " << blocked_by
<< (pg_down ? " pg_down":"")
<< dendl;
}
// true if the given map affects the prior set
bool PG::PriorSet::affected_by_map(const OSDMapRef osdmap, const PG *debug_pg) const
{
for (set<pg_shard_t>::iterator p = probe.begin();
p != probe.end();
++p) {
int o = p->osd;
// did someone in the prior set go down?
if (osdmap->is_down(o) && down.count(o) == 0) {
dout(10) << "affected_by_map osd." << o << " now down" << dendl;
return true;
}
// did a down osd in cur get (re)marked as lost?
map<int, epoch_t>::const_iterator r = blocked_by.find(o);
if (r != blocked_by.end()) {
if (!osdmap->exists(o)) {
dout(10) << "affected_by_map osd." << o << " no longer exists" << dendl;
return true;
}
if (osdmap->get_info(o).lost_at != r->second) {
dout(10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
return true;
}
}
}
// did someone in the prior down set go up?
for (set<int>::const_iterator p = down.begin();
p != down.end();
++p) {
int o = *p;
if (osdmap->is_up(o)) {
dout(10) << "affected_by_map osd." << o << " now up" << dendl;
return true;
}
// did someone in the prior set get lost or destroyed?
if (!osdmap->exists(o)) {
dout(10) << "affected_by_map osd." << o << " no longer exists" << dendl;
return true;
}
// did a down osd in down get (re)marked as lost?
map<int, epoch_t>::const_iterator r = blocked_by.find(o);
if (r != blocked_by.end()) {
if (osdmap->get_info(o).lost_at != r->second) {
dout(10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
return true;
}
}
}
return false;
}
void PG::RecoveryState::start_handle(RecoveryCtx *new_ctx) {
assert(!rctx);
assert(!orig_ctx);

View File

@ -194,7 +194,7 @@ struct PGPool {
*
*/
class PG : protected DoutPrefixProvider {
class PG : public DoutPrefixProvider {
protected:
OSDService *osd;
CephContext *cct;
@ -491,7 +491,7 @@ public:
}
} missing_loc;
map<epoch_t,pg_interval_t> past_intervals;
PastIntervals past_intervals;
interval_set<snapid_t> snap_trimq;
@ -551,45 +551,19 @@ public:
set<int> blocked_by; ///< osds we are blocked by (for pg stats)
// [primary only] content recovery state
protected:
struct PriorSet {
CephContext* cct;
const bool ec_pool;
set<pg_shard_t> probe; /// current+prior OSDs we need to probe.
set<int> down; /// down osds that would normally be in @a probe and might be interesting.
map<int, epoch_t> blocked_by; /// current lost_at values for any OSDs in cur set for which (re)marking them lost would affect cur set
bool pg_down; /// some down osds are included in @a cur; the DOWN pg state bit should be set.
boost::scoped_ptr<IsPGRecoverablePredicate> pcontdec;
PriorSet(CephContext* cct,
bool ec_pool,
IsPGRecoverablePredicate *c,
const OSDMap &osdmap,
const map<epoch_t, pg_interval_t> &past_intervals,
const vector<int> &up,
const vector<int> &acting,
const pg_info_t &info,
const PG *debug_pg = nullptr);
bool affected_by_map(const OSDMapRef osdmap, const PG *debug_pg=0) const;
};
friend std::ostream& operator<<(std::ostream& oss,
const struct PriorSet &prior);
public:
struct BufferedRecoveryMessages {
map<int, map<spg_t, pg_query_t> > query_map;
map<int, vector<pair<pg_notify_t, pg_interval_map_t> > > info_map;
map<int, vector<pair<pg_notify_t, pg_interval_map_t> > > notify_list;
map<int, vector<pair<pg_notify_t, PastIntervals> > > info_map;
map<int, vector<pair<pg_notify_t, PastIntervals> > > notify_list;
};
struct RecoveryCtx {
utime_t start_time;
map<int, map<spg_t, pg_query_t> > *query_map;
map<int, vector<pair<pg_notify_t, pg_interval_map_t> > > *info_map;
map<int, vector<pair<pg_notify_t, pg_interval_map_t> > > *notify_list;
map<int, vector<pair<pg_notify_t, PastIntervals> > > *info_map;
map<int, vector<pair<pg_notify_t, PastIntervals> > > *notify_list;
set<PGRef> created_pgs;
C_Contexts *on_applied;
C_Contexts *on_safe;
@ -597,9 +571,9 @@ public:
ThreadPool::TPHandle* handle;
RecoveryCtx(map<int, map<spg_t, pg_query_t> > *query_map,
map<int,
vector<pair<pg_notify_t, pg_interval_map_t> > > *info_map,
vector<pair<pg_notify_t, PastIntervals> > > *info_map,
map<int,
vector<pair<pg_notify_t, pg_interval_map_t> > > *notify_list,
vector<pair<pg_notify_t, PastIntervals> > > *notify_list,
C_Contexts *on_applied,
C_Contexts *on_safe,
ObjectStore::Transaction *transaction)
@ -633,20 +607,20 @@ public:
omap[j->first] = j->second;
}
}
for (map<int, vector<pair<pg_notify_t, pg_interval_map_t> > >::iterator i
for (map<int, vector<pair<pg_notify_t, PastIntervals> > >::iterator i
= m.info_map.begin();
i != m.info_map.end();
++i) {
vector<pair<pg_notify_t, pg_interval_map_t> > &ovec =
vector<pair<pg_notify_t, PastIntervals> > &ovec =
(*info_map)[i->first];
ovec.reserve(ovec.size() + i->second.size());
ovec.insert(ovec.end(), i->second.begin(), i->second.end());
}
for (map<int, vector<pair<pg_notify_t, pg_interval_map_t> > >::iterator i
for (map<int, vector<pair<pg_notify_t, PastIntervals> > >::iterator i
= m.notify_list.begin();
i != m.notify_list.end();
++i) {
vector<pair<pg_notify_t, pg_interval_map_t> > &ovec =
vector<pair<pg_notify_t, PastIntervals> > &ovec =
(*notify_list)[i->first];
ovec.reserve(ovec.size() + i->second.size());
ovec.insert(ovec.end(), i->second.begin(), i->second.end());
@ -933,10 +907,24 @@ public:
void mark_clean(); ///< mark an active pg clean
bool _calc_past_interval_range(epoch_t *start, epoch_t *end, epoch_t oldest_map);
void generate_past_intervals();
static pair<epoch_t, epoch_t> get_required_past_interval_bounds(
const pg_info_t &info,
epoch_t oldest_map) {
epoch_t start = MAX(
info.history.last_epoch_started,
MAX(oldest_map, info.history.epoch_created));
epoch_t end = MAX(
info.history.same_interval_since,
info.history.epoch_created);
if (start == end) {
return make_pair(0, 0);
} else {
return make_pair(start, end);
}
}
void check_past_interval_bounds() const;
void trim_past_intervals();
void build_prior(std::unique_ptr<PriorSet> &prior_set);
PastIntervals::PriorSet build_prior();
void remove_down_peer_info(const OSDMapRef osdmap);
@ -1064,7 +1052,7 @@ public:
list<Context*>& tfin,
map<int, map<spg_t,pg_query_t> >& query_map,
map<int,
vector<pair<pg_notify_t, pg_interval_map_t> > > *activator_map,
vector<pair<pg_notify_t, PastIntervals> > > *activator_map,
RecoveryCtx *ctx);
void _activate_committed(epoch_t epoch, epoch_t activation_epoch);
void all_activated_and_committed();
@ -1573,7 +1561,7 @@ public:
return state->rctx->query_map;
}
map<int, vector<pair<pg_notify_t, pg_interval_map_t> > > *get_info_map() {
map<int, vector<pair<pg_notify_t, PastIntervals> > > *get_info_map() {
assert(state->rctx);
assert(state->rctx->info_map);
return state->rctx->info_map;
@ -1594,7 +1582,7 @@ public:
RecoveryCtx *get_recovery_ctx() { return &*(state->rctx); }
void send_notify(pg_shard_t to,
const pg_notify_t &info, const pg_interval_map_t &pi) {
const pg_notify_t &info, const PastIntervals &pi) {
assert(state->rctx);
assert(state->rctx->notify_list);
(*state->rctx->notify_list)[to.osd].push_back(make_pair(info, pi));
@ -1744,7 +1732,7 @@ public:
struct Active;
struct Peering : boost::statechart::state< Peering, Primary, GetInfo >, NamedState {
std::unique_ptr< PriorSet > prior_set;
PastIntervals::PriorSet prior_set;
bool history_les_bound; //< need osd_find_best_info_ignore_history_les
explicit Peering(my_context ctx);
@ -2242,7 +2230,7 @@ public:
const vector<int>& acting,
int acting_primary,
const pg_history_t& history,
const pg_interval_map_t& pim,
const PastIntervals& pim,
bool backfill,
ObjectStore::Transaction *t);
@ -2266,7 +2254,7 @@ public:
epoch_t epoch,
pg_info_t &info,
pg_info_t &last_written_info,
map<epoch_t,pg_interval_t> &past_intervals,
PastIntervals &past_intervals,
bool dirty_big_info,
bool dirty_epoch,
bool try_fast_info,
@ -2303,7 +2291,7 @@ public:
std::string get_corrupt_pg_log_name() const;
static int read_info(
ObjectStore *store, spg_t pgid, const coll_t &coll,
bufferlist &bl, pg_info_t &info, map<epoch_t,pg_interval_t> &past_intervals,
bufferlist &bl, pg_info_t &info, PastIntervals &past_intervals,
__u8 &);
void read_state(ObjectStore *store, bufferlist &bl);
static bool _has_removal_flag(ObjectStore *store, spg_t pgid);

View File

@ -2792,7 +2792,7 @@ ostream &operator<<(ostream &lhs, const pg_notify_t &notify)
// -- pg_interval_t --
void pg_interval_t::encode(bufferlist& bl) const
void PastIntervals::pg_interval_t::encode(bufferlist& bl) const
{
ENCODE_START(4, 2, bl);
::encode(first, bl);
@ -2805,7 +2805,7 @@ void pg_interval_t::encode(bufferlist& bl) const
ENCODE_FINISH(bl);
}
void pg_interval_t::decode(bufferlist::iterator& bl)
void PastIntervals::pg_interval_t::decode(bufferlist::iterator& bl)
{
DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
::decode(first, bl);
@ -2828,7 +2828,7 @@ void pg_interval_t::decode(bufferlist::iterator& bl)
DECODE_FINISH(bl);
}
void pg_interval_t::dump(Formatter *f) const
void PastIntervals::pg_interval_t::dump(Formatter *f) const
{
f->dump_unsigned("first", first);
f->dump_unsigned("last", last);
@ -2845,7 +2845,7 @@ void pg_interval_t::dump(Formatter *f) const
f->dump_int("up_primary", up_primary);
}
void pg_interval_t::generate_test_instances(list<pg_interval_t*>& o)
void PastIntervals::pg_interval_t::generate_test_instances(list<pg_interval_t*>& o)
{
o.push_back(new pg_interval_t);
o.push_back(new pg_interval_t);
@ -2857,7 +2857,155 @@ void pg_interval_t::generate_test_instances(list<pg_interval_t*>& o)
o.back()->maybe_went_rw = true;
}
bool pg_interval_t::is_new_interval(
void PastIntervals::encode(bufferlist &bl) const
{
::encode(past_intervals, bl);
}
void PastIntervals::decode(bufferlist::iterator &bl)
{
::decode(past_intervals, bl);
}
void PastIntervals::dump(Formatter *f) const
{
f->open_array_section("PastIntervals");
for (auto &&i: past_intervals) {
f->open_object_section("pg_interval_t");
f->dump_int("epoch", i.first);
f->open_object_section("interval");
i.second.dump(f);
f->close_section();
f->close_section();
}
f->close_section();
}
void PastIntervals::generate_test_instances(list<PastIntervals*> &o)
{
/* todo */
return;
}
ostream& operator<<(ostream& out, const PastIntervals &i)
{
return out << i.past_intervals;
}
set<pg_shard_t> PastIntervals::get_might_have_unfound(
pg_shard_t pg_whoami,
bool ec_pool) const
{
set<pg_shard_t> might_have_unfound;
// We need to decide who might have unfound objects that we need
PastIntervals::const_reverse_iterator p = past_intervals.rbegin();
PastIntervals::const_reverse_iterator end = past_intervals.rend();
for (; p != end; ++p) {
const PastIntervals::pg_interval_t &interval(p->second);
// If nothing changed, we don't care about this interval.
if (!interval.maybe_went_rw)
continue;
int i = 0;
std::vector<int>::const_iterator a = interval.acting.begin();
std::vector<int>::const_iterator a_end = interval.acting.end();
for (; a != a_end; ++a, ++i) {
pg_shard_t shard(*a, ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD);
if (*a != CRUSH_ITEM_NONE && shard != pg_whoami)
might_have_unfound.insert(shard);
}
}
return might_have_unfound;
}
bool PastIntervals::has_crashed_interval_since(
epoch_t since,
const vector<int> &acting,
const vector<int> &up,
const OSDMap &osdmap,
const DoutPrefixProvider *dpp) const {
bool crashed = false;
for (PastIntervals::const_reverse_iterator p = past_intervals.rbegin();
p != past_intervals.rend();
++p) {
const PastIntervals::pg_interval_t &interval = p->second;
ldpp_dout(dpp,10) << "may_need_replay " << interval << dendl;
if (interval.last < since)
break; // we don't care
if (interval.acting.empty())
continue;
if (!interval.maybe_went_rw)
continue;
// look at whether any of the osds during this interval survived
// past the end of the interval (i.e., didn't crash and
// potentially fail to COMMIT a write that it ACKed).
bool any_survived_interval = false;
// consider ACTING osds
for (unsigned i=0; i<interval.acting.size(); i++) {
int o = interval.acting[i];
if (o == CRUSH_ITEM_NONE)
continue;
const osd_info_t *pinfo = 0;
if (osdmap.exists(o))
pinfo = &osdmap.get_info(o);
// does this osd appear to have survived through the end of the
// interval?
if (pinfo) {
if (pinfo->up_from <= interval.first && pinfo->up_thru > interval.last) {
ldpp_dout(dpp, 10) << "may_need_replay osd." << o
<< " up_from " << pinfo->up_from
<< " up_thru " << pinfo->up_thru
<< " survived the interval" << dendl;
any_survived_interval = true;
}
else if (pinfo->up_from <= interval.first &&
(std::find(acting.begin(), acting.end(), o) != acting.end() ||
std::find(up.begin(), up.end(), o) != up.end())) {
ldpp_dout(dpp, 10) << "may_need_replay osd." << o
<< " up_from " << pinfo->up_from
<< " and is in acting|up,"
<< " assumed to have survived the interval"
<< dendl;
// (if it hasn't, we will rebuild PriorSet)
any_survived_interval = true;
}
else if (pinfo->up_from > interval.last &&
pinfo->last_clean_begin <= interval.first &&
pinfo->last_clean_end > interval.last) {
ldpp_dout(dpp, 10) << "may_need_replay prior osd." << o
<< " up_from " << pinfo->up_from
<< " and last clean interval ["
<< pinfo->last_clean_begin << ","
<< pinfo->last_clean_end
<< ") survived the interval" << dendl;
any_survived_interval = true;
}
}
}
if (!any_survived_interval) {
ldpp_dout(dpp, 3) << "may_need_replay no known survivors of interval "
<< interval.first << "-" << interval.last
<< ", may need replay" << dendl;
crashed = true;
break;
}
}
return crashed;
}
bool PastIntervals::is_new_interval(
int old_acting_primary,
int new_acting_primary,
const vector<int> &old_acting,
@ -2885,7 +3033,7 @@ bool pg_interval_t::is_new_interval(
old_sort_bitwise != new_sort_bitwise;
}
bool pg_interval_t::is_new_interval(
bool PastIntervals::is_new_interval(
int old_acting_primary,
int new_acting_primary,
const vector<int> &old_acting,
@ -2917,7 +3065,7 @@ bool pg_interval_t::is_new_interval(
pgid);
}
bool pg_interval_t::check_new_interval(
bool PastIntervals::check_new_interval(
int old_acting_primary,
int new_acting_primary,
const vector<int> &old_acting,
@ -2932,7 +3080,7 @@ bool pg_interval_t::check_new_interval(
OSDMapRef lastmap,
pg_t pgid,
IsPGRecoverablePredicate *could_have_gone_active,
map<epoch_t, pg_interval_t> *past_intervals,
PastIntervals *past_intervals,
std::ostream *out)
{
/*
@ -2992,7 +3140,7 @@ bool pg_interval_t::check_new_interval(
osdmap,
lastmap,
pgid)) {
pg_interval_t& i = (*past_intervals)[same_interval_since];
pg_interval_t& i = past_intervals->past_intervals[same_interval_since];
i.first = same_interval_since;
i.last = osdmap->get_epoch() - 1;
assert(i.first <= i.last);
@ -3067,7 +3215,224 @@ bool pg_interval_t::check_new_interval(
}
}
ostream& operator<<(ostream& out, const pg_interval_t& i)
PastIntervals::PriorSet::PriorSet(
bool ec_pool,
IsPGRecoverablePredicate *c,
const OSDMap &osdmap,
const PastIntervals &past_intervals,
const vector<int> &up,
const vector<int> &acting,
const pg_info_t &info,
const DoutPrefixProvider *dpp)
: ec_pool(ec_pool), pg_down(false), pcontdec(c)
{
/*
* We have to be careful to gracefully deal with situations like
* so. Say we have a power outage or something that takes out both
* OSDs, but the monitor doesn't mark them down in the same epoch.
* The history may look like
*
* 1: A B
* 2: B
* 3: let's say B dies for good, too (say, from the power spike)
* 4: A
*
* which makes it look like B may have applied updates to the PG
* that we need in order to proceed. This sucks...
*
* To minimize the risk of this happening, we CANNOT go active if
* _any_ OSDs in the prior set are down until we send an MOSDAlive
* to the monitor such that the OSDMap sets osd_up_thru to an epoch.
* Then, we have something like
*
* 1: A B
* 2: B up_thru[B]=0
* 3:
* 4: A
*
* -> we can ignore B, bc it couldn't have gone active (alive_thru
* still 0).
*
* or,
*
* 1: A B
* 2: B up_thru[B]=0
* 3: B up_thru[B]=2
* 4:
* 5: A
*
* -> we must wait for B, bc it was alive through 2, and could have
* written to the pg.
*
* If B is really dead, then an administrator will need to manually
* intervene by marking the OSD as "lost."
*/
// Include current acting and up nodes... not because they may
// contain old data (this interval hasn't gone active, obviously),
// but because we want their pg_info to inform choose_acting(), and
// so that we know what they do/do not have explicitly before
// sending them any new info/logs/whatever.
for (unsigned i = 0; i < acting.size(); i++) {
if (acting[i] != CRUSH_ITEM_NONE)
probe.insert(pg_shard_t(acting[i], ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD));
}
// It may be possible to exclude the up nodes, but let's keep them in
// there for now.
for (unsigned i = 0; i < up.size(); i++) {
if (up[i] != CRUSH_ITEM_NONE)
probe.insert(pg_shard_t(up[i], ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD));
}
for (PastIntervals::const_reverse_iterator p = past_intervals.rbegin();
p != past_intervals.rend();
++p) {
const PastIntervals::pg_interval_t &interval = p->second;
ldpp_dout(dpp, 10) << "build_prior " << interval << dendl;
if (interval.last < info.history.last_epoch_started)
break; // we don't care
if (interval.acting.empty())
continue;
if (!interval.maybe_went_rw)
continue;
// look at candidate osds during this interval. each falls into
// one of three categories: up, down (but potentially
// interesting), or lost (down, but we won't wait for it).
set<pg_shard_t> up_now;
bool any_down_now = false; // any candidates down now (that might have useful data)
// consider ACTING osds
for (unsigned i = 0; i < interval.acting.size(); i++) {
int o = interval.acting[i];
if (o == CRUSH_ITEM_NONE)
continue;
pg_shard_t so(o, ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD);
const osd_info_t *pinfo = 0;
if (osdmap.exists(o))
pinfo = &osdmap.get_info(o);
if (osdmap.is_up(o)) {
// include past acting osds if they are up.
probe.insert(so);
up_now.insert(so);
} else if (!pinfo) {
ldpp_dout(dpp, 10) << "build_prior prior osd." << o << " no longer exists" << dendl;
down.insert(o);
} else if (pinfo->lost_at > interval.first) {
ldpp_dout(dpp, 10) << "build_prior prior osd." << o << " is down, but lost_at " << pinfo->lost_at << dendl;
up_now.insert(so);
down.insert(o);
} else {
ldpp_dout(dpp, 10) << "build_prior prior osd." << o << " is down" << dendl;
down.insert(o);
any_down_now = true;
}
}
// if not enough osds survived this interval, and we may have gone rw,
// then we need to wait for one of those osds to recover to
// ensure that we haven't lost any information.
if (!(*pcontdec)(up_now) && any_down_now) {
// fixme: how do we identify a "clean" shutdown anyway?
ldpp_dout(dpp, 10) << "build_prior possibly went active+rw, insufficient up;"
<< " including down osds" << dendl;
for (vector<int>::const_iterator i = interval.acting.begin();
i != interval.acting.end();
++i) {
if (osdmap.exists(*i) && // if it doesn't exist, we already consider it lost.
osdmap.is_down(*i)) {
pg_down = true;
// make note of when any down osd in the cur set was lost, so that
// we can notice changes in prior_set_affected.
blocked_by[*i] = osdmap.get_info(*i).lost_at;
}
}
}
}
ldpp_dout(dpp, 10) << "build_prior final: probe " << probe
<< " down " << down
<< " blocked_by " << blocked_by
<< (pg_down ? " pg_down":"")
<< dendl;
}
// true if the given map affects the prior set
bool PastIntervals::PriorSet::affected_by_map(
const OSDMap &osdmap,
const DoutPrefixProvider *dpp) const
{
for (set<pg_shard_t>::iterator p = probe.begin();
p != probe.end();
++p) {
int o = p->osd;
// did someone in the prior set go down?
if (osdmap.is_down(o) && down.count(o) == 0) {
ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now down" << dendl;
return true;
}
// did a down osd in cur get (re)marked as lost?
map<int, epoch_t>::const_iterator r = blocked_by.find(o);
if (r != blocked_by.end()) {
if (!osdmap.exists(o)) {
ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
return true;
}
if (osdmap.get_info(o).lost_at != r->second) {
ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
return true;
}
}
}
// did someone in the prior down set go up?
for (set<int>::const_iterator p = down.begin();
p != down.end();
++p) {
int o = *p;
if (osdmap.is_up(o)) {
ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now up" << dendl;
return true;
}
// did someone in the prior set get lost or destroyed?
if (!osdmap.exists(o)) {
ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
return true;
}
// did a down osd in down get (re)marked as lost?
map<int, epoch_t>::const_iterator r = blocked_by.find(o);
if (r != blocked_by.end()) {
if (osdmap.get_info(o).lost_at != r->second) {
ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
return true;
}
}
}
return false;
}
std::ostream& operator<<(
std::ostream& oss,
const PastIntervals::PriorSet &prior)
{
oss << "PriorSet[probe=" << prior.probe << " "
<< "down=" << prior.down << " "
<< "blocked_by=" << prior.blocked_by << "]";
return oss;
}
ostream& operator<<(ostream& out, const PastIntervals::pg_interval_t& i)
{
out << "interval(" << i.first << "-" << i.last
<< " up " << i.up << "(" << i.up_primary << ")"

View File

@ -2471,28 +2471,40 @@ WRITE_CLASS_ENCODER(pg_notify_t)
ostream &operator<<(ostream &lhs, const pg_notify_t &notify);
/**
* pg_interval_t - information about a past interval
*/
class OSDMap;
struct pg_interval_t {
vector<int32_t> up, acting;
epoch_t first, last;
bool maybe_went_rw;
int32_t primary;
int32_t up_primary;
/**
* PastIntervals -- information needed to determine the PriorSet and
* the might_have_unfound set
*/
class PastIntervals {
public:
struct pg_interval_t {
vector<int32_t> up, acting;
epoch_t first, last;
bool maybe_went_rw;
int32_t primary;
int32_t up_primary;
pg_interval_t()
: first(0), last(0),
maybe_went_rw(false),
primary(-1),
up_primary(-1)
{}
pg_interval_t()
: first(0), last(0),
maybe_went_rw(false),
primary(-1),
up_primary(-1)
{}
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bl);
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bl);
void dump(Formatter *f) const;
static void generate_test_instances(list<pg_interval_t*>& o);
};
private:
map<epoch_t, pg_interval_t> past_intervals;
public:
void encode(bufferlist &bl) const;
void decode(bufferlist::iterator &bl);
void dump(Formatter *f) const;
static void generate_test_instances(list<pg_interval_t*>& o);
static void generate_test_instances(list<PastIntervals *> & o);
/**
* Determines whether there is an interval change
@ -2549,20 +2561,133 @@ struct pg_interval_t {
const vector<int> &new_up, ///< [in] up as of osdmap
epoch_t same_interval_since, ///< [in] as of osdmap
epoch_t last_epoch_clean, ///< [in] current
ceph::shared_ptr<const OSDMap> osdmap, ///< [in] current map
ceph::shared_ptr<const OSDMap> lastmap, ///< [in] last map
ceph::shared_ptr<const OSDMap> osdmap, ///< [in] current map
ceph::shared_ptr<const OSDMap> lastmap, ///< [in] last map
pg_t pgid, ///< [in] pgid for pg
IsPGRecoverablePredicate *could_have_gone_active, /// [in] predicate whether the pg can be active
map<epoch_t, pg_interval_t> *past_intervals,///< [out] intervals
PastIntervals *past_intervals, ///< [out] intervals
ostream *out = 0 ///< [out] debug ostream
);
friend ostream& operator<<(ostream& out, const PastIntervals &i);
template <typename F>
void trim(epoch_t bound, F &&f) {
PastIntervals::iterator pif = past_intervals.begin();
PastIntervals::iterator end = past_intervals.end();
while (pif != end) {
if (pif->second.last >= bound)
return;
f(pif->second);
past_intervals.erase(pif++);
}
}
void clear() { past_intervals.clear(); }
/**
* Should return a value which gives an indication of the amount
* of state contained
*/
size_t size() const { return past_intervals.size(); }
bool empty() const { return past_intervals.empty(); }
void swap(PastIntervals &other) {
::swap(other.past_intervals, past_intervals);
}
/**
* Return all osds which have been in the acting set back to the
* latest epoch to which we have trimmed
*/
set<pg_shard_t> get_might_have_unfound(
pg_shard_t pg_whoami,
bool ec_pool) const;
bool has_crashed_interval_since(
epoch_t since,
const vector<int> &acting,
const vector<int> &up,
const OSDMap& osdmap,
const DoutPrefixProvider *dpp) const;
/* Return the set of epochs
* [(start_interval_start, start_interval_end), end) represented by the
* past_interval set. Annoyingly, pg_info_t records last_epoch_started,
* but doesn't pin it to the start of the interval, so we have to return
* the first interval so a user can verify that last_epoch_started falls
* within it */
pair<pair<epoch_t, epoch_t>, epoch_t> get_bounds() const {
auto iter = past_intervals.begin();
if (iter != past_intervals.end()) {
auto riter = past_intervals.rbegin();
return make_pair(
make_pair(iter->second.first, iter->second.last + 1),
riter->second.last + 1);
} else {
assert(0 == "get_bounds only valid if !empty()");
return make_pair(make_pair(0, 0), 0);
}
}
struct PriorSet {
bool ec_pool = false;
set<pg_shard_t> probe; /// current+prior OSDs we need to probe.
set<int> down; /// down osds that would normally be in @a probe and might be interesting.
map<int, epoch_t> blocked_by; /// current lost_at values for any OSDs in cur set for which (re)marking them lost would affect cur set
bool pg_down = false; /// some down osds are included in @a cur; the DOWN pg state bit should be set.
unique_ptr<IsPGRecoverablePredicate> pcontdec;
PriorSet() = default;
PriorSet(PriorSet &&) = default;
PriorSet &operator=(PriorSet &&) = default;
PriorSet &operator=(const PriorSet &) = delete;
PriorSet(const PriorSet &) = delete;
bool affected_by_map(
const OSDMap &osdmap,
const DoutPrefixProvider *dpp) const;
private:
PriorSet(
bool ec_pool,
IsPGRecoverablePredicate *c,
const OSDMap &osdmap,
const PastIntervals &past_intervals,
const vector<int> &up,
const vector<int> &acting,
const pg_info_t &info,
const DoutPrefixProvider *dpp);
friend class PastIntervals;
};
template <typename... Args>
PriorSet get_prior_set(Args&&... args) const {
return PriorSet(std::forward<Args>(args)...);
}
private:
using iterator = map<epoch_t, pg_interval_t>::iterator;
using const_iterator = map<epoch_t, pg_interval_t>::const_iterator;
using reverse_iterator = map<epoch_t, pg_interval_t>::reverse_iterator;
using const_reverse_iterator =
map<epoch_t, pg_interval_t>::const_reverse_iterator;
iterator begin() { return past_intervals.begin(); }
iterator end() { return past_intervals.end(); }
const_iterator begin() const { return past_intervals.begin(); }
const_iterator end() const { return past_intervals.end(); }
reverse_iterator rbegin() { return past_intervals.rbegin(); }
reverse_iterator rend() { return past_intervals.rend(); }
const_reverse_iterator rbegin() const { return past_intervals.rbegin(); }
const_reverse_iterator rend() const { return past_intervals.rend(); }
};
WRITE_CLASS_ENCODER(pg_interval_t)
ostream& operator<<(ostream& out, const pg_interval_t& i);
typedef map<epoch_t, pg_interval_t> pg_interval_map_t;
WRITE_CLASS_ENCODER(PastIntervals::pg_interval_t)
WRITE_CLASS_ENCODER(PastIntervals)
ostream& operator<<(ostream& out, const PastIntervals::pg_interval_t& i);
ostream& operator<<(ostream& out, const PastIntervals &i);
/**
* pg_query_t - used to ask a peer for information about a pg.

View File

@ -2760,7 +2760,7 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change)
bool sort_bitwise = osdmap->test_flag(CEPH_OSDMAP_SORTBITWISE);
unsigned prev_seed = ceph_stable_mod(pgid.ps(), t->pg_num, t->pg_num_mask);
pg_t prev_pgid(prev_seed, pgid.pool());
if (any_change && pg_interval_t::is_new_interval(
if (any_change && PastIntervals::is_new_interval(
t->acting_primary,
acting_primary,
t->acting,

View File

@ -67,7 +67,7 @@ TYPE(pg_stat_t)
TYPE_FEATUREFUL(pool_stat_t)
TYPE(pg_history_t)
TYPE(pg_info_t)
TYPE(pg_interval_t)
TYPE(PastIntervals::pg_interval_t)
TYPE_FEATUREFUL(pg_query_t)
TYPE(pg_log_entry_t)
TYPE(pg_log_t)

View File

@ -120,6 +120,7 @@ TEST(hobject, prefixes5)
ASSERT_EQ(prefixes_out, prefixes_correct);
}
#if 0
TEST(pg_interval_t, check_new_interval)
{
//
@ -170,10 +171,10 @@ TEST(pg_interval_t, check_new_interval)
// being split
//
{
map<epoch_t, pg_interval_t> past_intervals;
PastIntervals past_intervals;
ASSERT_TRUE(past_intervals.empty());
ASSERT_FALSE(pg_interval_t::check_new_interval(old_primary,
ASSERT_FALSE(PastIntervals::check_new_interval(old_primary,
new_primary,
old_acting,
new_acting,
@ -199,10 +200,10 @@ TEST(pg_interval_t, check_new_interval)
int _new_primary = osd_id + 1;
new_acting.push_back(_new_primary);
map<epoch_t, pg_interval_t> past_intervals;
PastIntervals past_intervals;
ASSERT_TRUE(past_intervals.empty());
ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
new_primary,
old_acting,
new_acting,
@ -233,10 +234,10 @@ TEST(pg_interval_t, check_new_interval)
int _new_primary = osd_id + 1;
new_up.push_back(_new_primary);
map<epoch_t, pg_interval_t> past_intervals;
PastIntervals past_intervals;
ASSERT_TRUE(past_intervals.empty());
ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
new_primary,
old_acting,
new_acting,
@ -265,10 +266,10 @@ TEST(pg_interval_t, check_new_interval)
vector<int> new_up;
int _new_up_primary = osd_id + 1;
map<epoch_t, pg_interval_t> past_intervals;
PastIntervals past_intervals;
ASSERT_TRUE(past_intervals.empty());
ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
new_primary,
old_acting,
new_acting,
@ -304,10 +305,10 @@ TEST(pg_interval_t, check_new_interval)
inc.new_pools[pool_id].set_pg_num(new_pg_num);
osdmap->apply_incremental(inc);
map<epoch_t, pg_interval_t> past_intervals;
PastIntervals past_intervals;
ASSERT_TRUE(past_intervals.empty());
ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
new_primary,
old_acting,
new_acting,
@ -343,10 +344,10 @@ TEST(pg_interval_t, check_new_interval)
inc.new_pools[pool_id].set_pg_num(pg_num);
osdmap->apply_incremental(inc);
map<epoch_t, pg_interval_t> past_intervals;
PastIntervals past_intervals;
ASSERT_TRUE(past_intervals.empty());
ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
new_primary,
old_acting,
new_acting,
@ -375,12 +376,12 @@ TEST(pg_interval_t, check_new_interval)
{
vector<int> old_acting;
map<epoch_t, pg_interval_t> past_intervals;
PastIntervals past_intervals;
ostringstream out;
ASSERT_TRUE(past_intervals.empty());
ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
new_primary,
old_acting,
new_acting,
@ -429,10 +430,10 @@ TEST(pg_interval_t, check_new_interval)
ostringstream out;
map<epoch_t, pg_interval_t> past_intervals;
PastIntervals past_intervals;
ASSERT_TRUE(past_intervals.empty());
ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
new_primary,
old_acting,
new_acting,
@ -464,10 +465,10 @@ TEST(pg_interval_t, check_new_interval)
ostringstream out;
map<epoch_t, pg_interval_t> past_intervals;
PastIntervals past_intervals;
ASSERT_TRUE(past_intervals.empty());
ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
new_primary,
old_acting,
new_acting,
@ -509,10 +510,10 @@ TEST(pg_interval_t, check_new_interval)
ostringstream out;
map<epoch_t, pg_interval_t> past_intervals;
PastIntervals past_intervals;
ASSERT_TRUE(past_intervals.empty());
ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
new_primary,
old_acting,
new_acting,
@ -558,10 +559,10 @@ TEST(pg_interval_t, check_new_interval)
ostringstream out;
map<epoch_t, pg_interval_t> past_intervals;
PastIntervals past_intervals;
ASSERT_TRUE(past_intervals.empty());
ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
new_primary,
old_acting,
new_acting,
@ -582,6 +583,7 @@ TEST(pg_interval_t, check_new_interval)
ASSERT_NE(string::npos, out.str().find("does not include interval"));
}
}
#endif
TEST(pg_t, get_ancestor)
{

View File

@ -284,7 +284,7 @@ struct metadata_section {
epoch_t map_epoch;
pg_info_t info;
pg_log_t log;
map<epoch_t,pg_interval_t> past_intervals;
PastIntervals past_intervals;
OSDMap osdmap;
bufferlist osdmap_bl; // Used in lieu of encoding osdmap due to crc checking
map<eversion_t, hobject_t> divergent_priors;
@ -295,7 +295,7 @@ struct metadata_section {
epoch_t map_epoch,
const pg_info_t &info,
const pg_log_t &log,
const map<epoch_t,pg_interval_t> &past_intervals,
const PastIntervals &past_intervals,
const pg_missing_t &missing)
: struct_ver(struct_ver),
map_epoch(map_epoch),

View File

@ -401,7 +401,7 @@ int mark_pg_for_removal(ObjectStore *fs, spg_t pgid, ObjectStore::Transaction *t
int r = PG::peek_map_epoch(fs, pgid, &map_epoch, &bl);
if (r < 0)
cerr << __func__ << " warning: peek_map_epoch reported error" << std::endl;
map<epoch_t,pg_interval_t> past_intervals;
PastIntervals past_intervals;
__u8 struct_v;
r = PG::read_info(fs, pgid, coll, bl, info, past_intervals, struct_v);
if (r < 0) {
@ -442,7 +442,7 @@ int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid,
}
int write_info(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
map<epoch_t,pg_interval_t> &past_intervals)
PastIntervals &past_intervals)
{
//Empty for this
coll_t coll(info.pgid);
@ -464,7 +464,7 @@ int write_info(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
typedef map<eversion_t, hobject_t> divergent_priors_t;
int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
pg_log_t &log, map<epoch_t,pg_interval_t> &past_intervals,
pg_log_t &log, PastIntervals &past_intervals,
divergent_priors_t &divergent,
pg_missing_t &missing)
{
@ -747,7 +747,7 @@ int add_osdmap(ObjectStore *store, metadata_section &ms)
int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid,
pg_info_t &info, epoch_t map_epoch, __u8 struct_ver,
const OSDSuperblock& superblock,
map<epoch_t,pg_interval_t> &past_intervals)
PastIntervals &past_intervals)
{
PGLog::IndexedLog log;
pg_missing_t missing;
@ -3412,7 +3412,7 @@ int main(int argc, char **argv)
cerr << "map_epoch " << map_epoch << std::endl;
pg_info_t info(pgid);
map<epoch_t,pg_interval_t> past_intervals;
PastIntervals past_intervals;
__u8 struct_ver;
ret = PG::read_info(fs, pgid, coll, bl, info, past_intervals,
struct_ver);

View File

@ -30,7 +30,7 @@ class ObjectStoreTool : public RadosDump
int do_export(ObjectStore *fs, coll_t coll, spg_t pgid,
pg_info_t &info, epoch_t map_epoch, __u8 struct_ver,
const OSDSuperblock& superblock,
map<epoch_t,pg_interval_t> &past_intervals);
PastIntervals &past_intervals);
int get_object(ObjectStore *store, coll_t coll,
bufferlist &bl, OSDMap &curmap, bool *skipped_objects,
ObjectStore::Sequencer &osr);

View File

@ -364,7 +364,7 @@ int update_pgmap_pg(ObjectStore& fs, MonitorDBStore& ms)
continue;
bufferlist bl;
pg_info_t info(pgid);
map<epoch_t, pg_interval_t> past_intervals;
PastIntervals past_intervals;
__u8 struct_v;
r = PG::read_info(&fs, pgid, coll, bl, info, past_intervals, struct_v);
if (r < 0) {