PG: explicitely delay ops on backfill_pos

Previously, we considered backfill_pos degraded in order to delay
ops since a write to backfill_pos could generate a snap before
backfill_pos, and we assume that (0, backfill_pos) is fully
backfilled.  This is a problem since it's possible that
backfill_pos is a valid object, but not one that currently exists.
For example, it might have been deleted since last_backfill was
last changed.  Instead, we will explicitly delay ops on
backfill_pos in waiting_for_backfill_pos.

This error resulted in #2691 since wait_for_degraded_object also
attempts to recover the object. At this point, the primary would
attempt to recover the object, find that it isn't there, and put
it in the missing set with need=0,0.  Eventually, recover_primary
attempts to recover that object, finds that it has been deleted
in the log, and asserts.

Signed-off-by: Samuel Just <sam.just@inktank.com>
This commit is contained in:
Samuel Just 2012-09-24 14:33:17 -07:00 committed by Sage Weil
parent a351f7a1f4
commit 3f952afe5d
3 changed files with 19 additions and 6 deletions

View File

@ -634,6 +634,10 @@ protected:
// pg waiters
bool flushed;
// Ops waiting on backfill_pos to change
list<OpRequestRef> waiting_for_backfill_pos;
list<OpRequestRef> waiting_for_map;
list<OpRequestRef> waiting_for_active;
list<OpRequestRef> waiting_for_all_missing;

View File

@ -140,12 +140,6 @@ bool ReplicatedPG::is_degraded_object(const hobject_t& soid)
peer_missing[peer].missing.count(soid))
return true;
// If soid == backfill_pos, we may implicitly write to
// the largest snap of soid for make_writeable.
if (peer == backfill_target &&
backfill_pos == soid)
return true;
// Object is degraded if after last_backfill AND
// we have are backfilling it
if (peer == backfill_target &&
@ -187,6 +181,16 @@ void ReplicatedPG::wait_for_degraded_object(const hobject_t& soid, OpRequestRef
op->mark_delayed();
}
void ReplicatedPG::wait_for_backfill_pos(OpRequestRef op)
{
waiting_for_backfill_pos.push_back(op);
}
void ReplicatedPG::release_waiting_for_backfill_pos()
{
requeue_ops(waiting_for_backfill_pos);
}
bool PGLSParentFilter::filter(bufferlist& xattr_data, bufferlist& outdata)
{
bufferlist::iterator iter = xattr_data.begin();
@ -1193,6 +1197,7 @@ void ReplicatedPG::do_scan(OpRequestRef op)
backfill_pos = backfill_info.begin > peer_backfill_info.begin ?
peer_backfill_info.begin : backfill_info.begin;
release_waiting_for_backfill_pos();
dout(10) << " backfill_pos now " << backfill_pos << dendl;
assert(waiting_on_backfill);
@ -5912,6 +5917,7 @@ void ReplicatedPG::on_change()
context_registry_on_change();
// requeue object waiters
requeue_ops(waiting_for_backfill_pos);
requeue_object_waiters(waiting_for_missing_object);
for (map<hobject_t,list<OpRequestRef> >::iterator p = waiting_for_degraded_object.begin();
p != waiting_for_degraded_object.end();
@ -6535,6 +6541,7 @@ int ReplicatedPG::recover_backfill(int max)
push_backfill_object(i->first, i->second.first, i->second.second, backfill_target);
}
release_waiting_for_backfill_pos();
dout(5) << "backfill_pos is " << backfill_pos << " and pinfo.last_backfill is "
<< pinfo.last_backfill << dendl;
for (set<hobject_t>::iterator i = backfills_in_flight.begin();

View File

@ -1039,6 +1039,8 @@ public:
bool is_missing_object(const hobject_t& oid);
void wait_for_missing_object(const hobject_t& oid, OpRequestRef op);
void wait_for_all_missing(OpRequestRef op);
void wait_for_backfill_pos(OpRequestRef op);
void release_waiting_for_backfill_pos();
bool is_degraded_object(const hobject_t& oid);
void wait_for_degraded_object(const hobject_t& oid, OpRequestRef op);