Merge pull request #11255 from dzafman/wip-scrub-boundary

osd: fix scrub boundary to not include a SnapSet

Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2016-10-10 09:12:16 -05:00 committed by GitHub
commit 3cc40d769b
5 changed files with 117 additions and 58 deletions

View File

@ -138,6 +138,14 @@ public:
return ret;
}
hobject_t get_object_boundary() const {
if (is_max())
return *this;
hobject_t ret = *this;
ret.snap = 0;
return ret;
}
/// @return head version of this hobject_t
hobject_t get_head() const {
hobject_t ret(*this);
@ -162,14 +170,14 @@ public:
return snap == CEPH_NOSNAP;
}
/// @return true if object is neither head nor snapdir
/// @return true if object is neither head nor snapdir nor max
bool is_snap() const {
return (snap != CEPH_NOSNAP) && (snap != CEPH_SNAPDIR);
return !is_max() && !is_head() && !is_snapdir();
}
/// @return true iff the object should have a snapset in it's attrs
bool has_snapset() const {
return !is_snap();
return is_head() || is_snapdir();
}
/* Do not use when a particular hash function is needed */

View File

@ -4024,6 +4024,8 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
case PG::Scrubber::INACTIVE:
dout(10) << "scrub start" << dendl;
scrubber.cleaned_meta_map.reset_bitwise(get_sort_bitwise());
publish_stats_to_osd();
scrubber.epoch_start = info.history.same_interval_since;
scrubber.active = true;
@ -4064,54 +4066,54 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
scrubber.received_maps.clear();
{
hobject_t candidate_end;
// get the start and end of our scrub chunk
//
// start and end need to lie on a hash boundary. We test for this by
// requesting a list and searching backward from the end looking for a
// boundary. If there's no boundary, we request a list after the first
// list, and so forth.
bool boundary_found = false;
/* get the start and end of our scrub chunk
*
* Our scrub chunk has an important restriction we're going to need to
* respect. We can't let head or snapdir be start or end.
* Using a half-open interval means that if end == head|snapdir,
* we'd scrub/lock head and the clone right next to head in different
* chunks which would allow us to miss clones created between
* scrubbing that chunk and scrubbing the chunk including head.
* This isn't true for any of the other clones since clones can
* only be created "just to the left of" head. There is one exception
* to this: promotion of clones which always happens to the left of the
* left-most clone, but promote_object checks the scrubber in that
* case, so it should be ok. Also, it's ok to "miss" clones at the
* left end of the range if we are a tier because they may legitimately
* not exist (see _scrub).
*/
unsigned min = MAX(3, cct->_conf->osd_scrub_chunk_min);
hobject_t start = scrubber.start;
unsigned loop = 0;
while (!boundary_found) {
vector<hobject_t> objects;
ret = get_pgbackend()->objects_list_partial(
start,
cct->_conf->osd_scrub_chunk_min,
cct->_conf->osd_scrub_chunk_max,
&objects,
&candidate_end);
assert(ret >= 0);
hobject_t candidate_end;
vector<hobject_t> objects;
ret = get_pgbackend()->objects_list_partial(
start,
min,
MAX(min, cct->_conf->osd_scrub_chunk_max),
&objects,
&candidate_end);
assert(ret >= 0);
// in case we don't find a boundary: start again at the end
start = candidate_end;
// special case: reached end of file store, implicitly a boundary
if (objects.empty()) {
break;
}
// search backward from the end looking for a boundary
objects.push_back(candidate_end);
while (!boundary_found && objects.size() > 1) {
hobject_t end = objects.back().get_boundary();
objects.pop_back();
if (objects.back().get_hash() != end.get_hash()) {
candidate_end = end;
boundary_found = true;
}
}
// reset handle once in a while, the search maybe takes long.
if (++loop >= g_conf->osd_loop_before_reset_tphandle) {
handle.reset_tp_timeout();
loop = 0;
}
}
if (!objects.empty()) {
hobject_t back = objects.back();
while (candidate_end.has_snapset() &&
candidate_end.get_head() == back.get_head()) {
candidate_end = back;
objects.pop_back();
if (objects.empty()) {
assert(0 ==
"Somehow we got more than 2 objects which"
"have the same head but are not clones");
}
back = objects.back();
}
if (candidate_end.has_snapset()) {
assert(candidate_end.get_head() != back.get_head());
candidate_end = candidate_end.get_object_boundary();
}
} else {
assert(candidate_end.is_max());
}
if (!_range_available_for_scrub(scrubber.start, candidate_end)) {
// we'll be requeued by whatever made us unavailable for scrub
@ -4136,7 +4138,8 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
}
}
// ask replicas to wait until last_update_applied >= scrubber.subset_last_update and then scan
// ask replicas to wait until
// last_update_applied >= scrubber.subset_last_update and then scan
scrubber.waiting_on_whom.insert(pg_whoami);
++scrubber.waiting_on;
@ -4283,7 +4286,7 @@ void PG::scrub_compare_maps()
dout(10) << __func__ << " has maps, analyzing" << dendl;
// construct authoritative scrub map for type specific scrubbing
ScrubMap authmap(scrubber.primary_scrubmap);
scrubber.cleaned_meta_map.insert(scrubber.primary_scrubmap);
map<hobject_t, pair<uint32_t, uint32_t>, hobject_t::BitwiseComparator> missing_digest;
if (acting.size() > 1) {
@ -4345,13 +4348,34 @@ void PG::scrub_compare_maps()
for (map<hobject_t, list<pg_shard_t>, hobject_t::BitwiseComparator>::iterator i = authoritative.begin();
i != authoritative.end();
++i) {
authmap.objects.erase(i->first);
authmap.objects.insert(*(maps[i->second.back()]->objects.find(i->first)));
scrubber.cleaned_meta_map.objects.erase(i->first);
scrubber.cleaned_meta_map.objects.insert(
*(maps[i->second.back()]->objects.find(i->first))
);
}
}
ScrubMap for_meta_scrub(get_sort_bitwise());
if (scrubber.end.is_max() ||
scrubber.cleaned_meta_map.objects.empty()) {
scrubber.cleaned_meta_map.swap(for_meta_scrub);
} else {
auto iter = scrubber.cleaned_meta_map.objects.end();
--iter; // not empty, see if clause
auto begin = scrubber.cleaned_meta_map.objects.begin();
while (iter != begin) {
auto next = iter--;
if (next->first.get_head() != iter->first.get_head()) {
++iter;
break;
}
}
for_meta_scrub.objects.insert(begin, iter);
scrubber.cleaned_meta_map.objects.erase(begin, iter);
}
// ok, do the pg-type specific scrubbing
_scrub(authmap, missing_digest);
_scrub(for_meta_scrub, missing_digest);
if (!scrubber.store->empty()) {
if (state_test(PG_STATE_REPAIR)) {
dout(10) << __func__ << ": discarding scrub results" << dendl;

View File

@ -1166,6 +1166,9 @@ public:
// Map from object with errors to good peers
map<hobject_t, list<pair<ScrubMap::object, pg_shard_t> >, hobject_t::BitwiseComparator> authoritative;
// Cleaned map pending snap metadata scrub
ScrubMap cleaned_meta_map;
// digest updates which we are waiting on
int num_digest_updates_pending;
@ -1264,6 +1267,7 @@ public:
missing.clear();
authoritative.clear();
num_digest_updates_pending = 0;
cleaned_meta_map = ScrubMap();
}
void create_results(const hobject_t& obj);

View File

@ -5099,9 +5099,9 @@ void ScrubMap::decode(bufferlist::iterator& bl, int64_t pool)
// handle hobject_t upgrade
if (struct_v < 3) {
map<hobject_t, object, hobject_t::BitwiseComparator> tmp;
map<hobject_t, object, hobject_t::ComparatorWithDefault> tmp;
tmp.swap(objects);
for (map<hobject_t, object, hobject_t::BitwiseComparator>::iterator i = tmp.begin();
for (map<hobject_t, object, hobject_t::ComparatorWithDefault>::iterator i = tmp.begin();
i != tmp.end();
++i) {
hobject_t first(i->first);
@ -5117,7 +5117,7 @@ void ScrubMap::dump(Formatter *f) const
f->dump_stream("valid_through") << valid_through;
f->dump_stream("incremental_since") << incr_since;
f->open_array_section("objects");
for (map<hobject_t,object, hobject_t::BitwiseComparator>::const_iterator p = objects.begin(); p != objects.end(); ++p) {
for (map<hobject_t,object, hobject_t::ComparatorWithDefault>::const_iterator p = objects.begin(); p != objects.end(); ++p) {
f->open_object_section("object");
f->dump_string("name", p->first.oid.name);
f->dump_unsigned("hash", p->first.get_hash());

View File

@ -4397,15 +4397,38 @@ struct ScrubMap {
};
WRITE_CLASS_ENCODER(object)
map<hobject_t,object, hobject_t::BitwiseComparator> objects;
bool bitwise; // ephemeral, not encoded
map<hobject_t,object, hobject_t::ComparatorWithDefault> objects;
eversion_t valid_through;
eversion_t incr_since;
ScrubMap() : bitwise(true) {}
ScrubMap(bool bitwise)
: bitwise(bitwise), objects(hobject_t::ComparatorWithDefault(bitwise)) {}
void merge_incr(const ScrubMap &l);
void insert(const ScrubMap &r) {
objects.insert(r.objects.begin(), r.objects.end());
}
void swap(ScrubMap &r) {
::swap(objects, r.objects);
::swap(valid_through, r.valid_through);
::swap(incr_since, r.incr_since);
}
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bl, int64_t pool=-1);
void dump(Formatter *f) const;
void reset_bitwise(bool new_bitwise) {
if (bitwise == new_bitwise)
return;
map<hobject_t, object, hobject_t::ComparatorWithDefault> new_objects(
objects.begin(),
objects.end(),
hobject_t::ComparatorWithDefault(new_bitwise));
::swap(new_objects, objects);
bitwise = new_bitwise;
}
static void generate_test_instances(list<ScrubMap*>& o);
};
WRITE_CLASS_ENCODER(ScrubMap::object)