osd/PeeringState: refresh prior_readable_until_ub in pg_history_t on share

Before we share pg_history_t, refresh the prior_readable_until_ub to be
a simple duration from *now*, so that it is completely clock-independent.
The receiver can interpret it based on the receive time for the message,
which loses a bit of precision but is safe since this is an upper bound.

Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2019-07-19 16:52:17 -05:00
parent 3bb8a7210a
commit 6945cc52fd
3 changed files with 53 additions and 0 deletions

View File

@ -224,6 +224,8 @@ void PeeringState::check_recovery_sources(const OSDMapRef& osdmap)
void PeeringState::update_history(const pg_history_t& new_history)
{
auto mnow = pl->get_mnow();
info.history.refresh_prior_readable_until_ub(mnow, prior_readable_until_ub);
if (info.history.merge(new_history)) {
psdout(20) << __func__ << " advanced history from " << new_history << dendl;
dirty_info = true;
@ -232,6 +234,13 @@ void PeeringState::update_history(const pg_history_t& new_history)
past_intervals.clear();
dirty_big_info = true;
}
prior_readable_until_ub = info.history.get_prior_readable_until_ub(mnow);
if (prior_readable_until_ub != ceph::signedspan::zero()) {
dout(20) << __func__
<< " prior_readable_until_ub " << prior_readable_until_ub
<< " (mnow " << mnow << " + "
<< info.history.prior_readable_until_ub << ")" << dendl;
}
}
pl->on_info_history_change();
}
@ -2318,6 +2327,8 @@ void PeeringState::activate(
info.purged_snaps.swap(purged);
// start up replicas
info.history.refresh_prior_readable_until_ub(pl->get_mnow(),
prior_readable_until_ub);
ceph_assert(!acting_recovery_backfill.empty());
for (set<pg_shard_t>::iterator i = acting_recovery_backfill.begin();
@ -2549,6 +2560,9 @@ void PeeringState::share_pg_info()
{
psdout(10) << "share_pg_info" << dendl;
info.history.refresh_prior_readable_until_ub(pl->get_mnow(),
prior_readable_until_ub);
// share new pg_info_t with replicas
ceph_assert(!acting_recovery_backfill.empty());
for (set<pg_shard_t>::iterator i = acting_recovery_backfill.begin();
@ -2728,6 +2742,7 @@ void PeeringState::fulfill_query(const MQuery& query, PeeringCtxWrapper &rctx)
{
if (query.query.type == pg_query_t::INFO) {
pair<pg_shard_t, pg_info_t> notify_info;
// note this refreshes our prior_readable_until_ub value
update_history(query.query.history);
fulfill_info(query.from, query.query, notify_info);
rctx.send_notify(
@ -4272,6 +4287,9 @@ boost::statechart::result PeeringState::Reset::react(const ActMap&)
{
DECLARE_LOCALS;
if (ps->should_send_notify() && ps->get_primary().osd >= 0) {
ps->info.history.refresh_prior_readable_until_ub(
pl->get_mnow(),
ps->prior_readable_until_ub);
context< PeeringMachine >().send_notify(
ps->get_primary().osd,
pg_notify_t(
@ -5882,6 +5900,8 @@ boost::statechart::result PeeringState::ReplicaActive::react(const ActMap&)
{
DECLARE_LOCALS;
if (ps->should_send_notify() && ps->get_primary().osd >= 0) {
ps->info.history.refresh_prior_readable_until_ub(
pl->get_mnow(), ps->prior_readable_until_ub);
context< PeeringMachine >().send_notify(
ps->get_primary().osd,
pg_notify_t(
@ -6002,6 +6022,8 @@ boost::statechart::result PeeringState::Stray::react(const ActMap&)
{
DECLARE_LOCALS;
if (ps->should_send_notify() && ps->get_primary().osd >= 0) {
ps->info.history.refresh_prior_readable_until_ub(
pl->get_mnow(), ps->prior_readable_until_ub);
context< PeeringMachine >().send_notify(
ps->get_primary().osd,
pg_notify_t(
@ -6861,5 +6883,9 @@ ostream &operator<<(ostream &out, const PeeringState &ps) {
out << " " << pg_state_string(ps.get_state());
if (ps.should_send_notify())
out << " NOTIFY";
if (ps.prior_readable_until_ub != ceph::signedspan::zero()) {
out << " pruub " << ps.prior_readable_until_ub;
}
return out;
}

View File

@ -1358,6 +1358,8 @@ public:
/// upper bound on any acting OSDs' readable_until in this interval
ceph::signedspan readable_until_ub = ceph::signedspan::zero();
/// upper bound from prior interval(s)
ceph::signedspan prior_readable_until_ub = ceph::signedspan::zero();
/// [replica] upper bound we got from the primary (primary's clock)
ceph::signedspan readable_until_ub_from_primary = ceph::signedspan::zero();
@ -1946,6 +1948,11 @@ public:
return readable_until;
}
/// Get prior intervals' readable_until upper bound
ceph::signedspan get_prior_readable_until_ub() const {
return prior_readable_until_ub;
}
void renew_lease(ceph::signedspan now) {
bool was_min = (readable_until_ub == readable_until);
readable_until_ub_sent = now + readable_interval;
@ -1953,6 +1960,7 @@ public:
recalc_readable_until();
}
}
void send_lease();
void schedule_renew_lease();

View File

@ -2760,6 +2760,25 @@ struct pg_history_t {
void decode(ceph::buffer::list::const_iterator& p);
void dump(ceph::Formatter *f) const;
static void generate_test_instances(std::list<pg_history_t*>& o);
ceph::signedspan refresh_prior_readable_until_ub(
ceph::signedspan now, ///< now, relative to osd startup_time
ceph::signedspan ub) { ///< ub, relative to osd startup_time
if (now >= ub) {
// prior interval(s) are unreadable; we can zero the upper bound
prior_readable_until_ub = ceph::signedspan::zero();
return ceph::signedspan::zero();
} else {
prior_readable_until_ub = ub - now;
return ub;
}
}
ceph::signedspan get_prior_readable_until_ub(ceph::signedspan now) {
if (prior_readable_until_ub == ceph::signedspan::zero()) {
return ceph::signedspan::zero();
}
return now + prior_readable_until_ub;
}
};
WRITE_CLASS_ENCODER(pg_history_t)