From 3f952afe5da644b30015fead8e3d42a129b59989 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Mon, 24 Sep 2012 14:33:17 -0700 Subject: [PATCH] PG: explicitely delay ops on backfill_pos Previously, we considered backfill_pos degraded in order to delay ops since a write to backfill_pos could generate a snap before backfill_pos, and we assume that (0, backfill_pos) is fully backfilled. This is a problem since it's possible that backfill_pos is a valid object, but not one that currently exists. For example, it might have been deleted since last_backfill was last changed. Instead, we will explicitly delay ops on backfill_pos in waiting_for_backfill_pos. This error resulted in #2691 since wait_for_degraded_object also attempts to recover the object. At this point, the primary would attempt to recover the object, find that it isn't there, and put it in the missing set with need=0,0. Eventually, recover_primary attempts to recover that object, finds that it has been deleted in the log, and asserts. Signed-off-by: Samuel Just --- src/osd/PG.h | 4 ++++ src/osd/ReplicatedPG.cc | 19 +++++++++++++------ src/osd/ReplicatedPG.h | 2 ++ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/osd/PG.h b/src/osd/PG.h index 469d293a0dd..5390a15f910 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -634,6 +634,10 @@ protected: // pg waiters bool flushed; + + // Ops waiting on backfill_pos to change + list waiting_for_backfill_pos; + list waiting_for_map; list waiting_for_active; list waiting_for_all_missing; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index c5f4e1b5fff..4df7872d9ea 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -140,12 +140,6 @@ bool ReplicatedPG::is_degraded_object(const hobject_t& soid) peer_missing[peer].missing.count(soid)) return true; - // If soid == backfill_pos, we may implicitly write to - // the largest snap of soid for make_writeable. - if (peer == backfill_target && - backfill_pos == soid) - return true; - // Object is degraded if after last_backfill AND // we have are backfilling it if (peer == backfill_target && @@ -187,6 +181,16 @@ void ReplicatedPG::wait_for_degraded_object(const hobject_t& soid, OpRequestRef op->mark_delayed(); } +void ReplicatedPG::wait_for_backfill_pos(OpRequestRef op) +{ + waiting_for_backfill_pos.push_back(op); +} + +void ReplicatedPG::release_waiting_for_backfill_pos() +{ + requeue_ops(waiting_for_backfill_pos); +} + bool PGLSParentFilter::filter(bufferlist& xattr_data, bufferlist& outdata) { bufferlist::iterator iter = xattr_data.begin(); @@ -1193,6 +1197,7 @@ void ReplicatedPG::do_scan(OpRequestRef op) backfill_pos = backfill_info.begin > peer_backfill_info.begin ? peer_backfill_info.begin : backfill_info.begin; + release_waiting_for_backfill_pos(); dout(10) << " backfill_pos now " << backfill_pos << dendl; assert(waiting_on_backfill); @@ -5912,6 +5917,7 @@ void ReplicatedPG::on_change() context_registry_on_change(); // requeue object waiters + requeue_ops(waiting_for_backfill_pos); requeue_object_waiters(waiting_for_missing_object); for (map >::iterator p = waiting_for_degraded_object.begin(); p != waiting_for_degraded_object.end(); @@ -6535,6 +6541,7 @@ int ReplicatedPG::recover_backfill(int max) push_backfill_object(i->first, i->second.first, i->second.second, backfill_target); } + release_waiting_for_backfill_pos(); dout(5) << "backfill_pos is " << backfill_pos << " and pinfo.last_backfill is " << pinfo.last_backfill << dendl; for (set::iterator i = backfills_in_flight.begin(); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 9c2836e7e09..d8c2d2408e4 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -1039,6 +1039,8 @@ public: bool is_missing_object(const hobject_t& oid); void wait_for_missing_object(const hobject_t& oid, OpRequestRef op); void wait_for_all_missing(OpRequestRef op); + void wait_for_backfill_pos(OpRequestRef op); + void release_waiting_for_backfill_pos(); bool is_degraded_object(const hobject_t& oid); void wait_for_degraded_object(const hobject_t& oid, OpRequestRef op);