mirror of
https://github.com/ceph/ceph
synced 2025-01-01 08:32:24 +00:00
Merge pull request #11255 from dzafman/wip-scrub-boundary
osd: fix scrub boundary to not include a SnapSet Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
commit
3cc40d769b
@ -138,6 +138,14 @@ public:
|
||||
return ret;
|
||||
}
|
||||
|
||||
hobject_t get_object_boundary() const {
|
||||
if (is_max())
|
||||
return *this;
|
||||
hobject_t ret = *this;
|
||||
ret.snap = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/// @return head version of this hobject_t
|
||||
hobject_t get_head() const {
|
||||
hobject_t ret(*this);
|
||||
@ -162,14 +170,14 @@ public:
|
||||
return snap == CEPH_NOSNAP;
|
||||
}
|
||||
|
||||
/// @return true if object is neither head nor snapdir
|
||||
/// @return true if object is neither head nor snapdir nor max
|
||||
bool is_snap() const {
|
||||
return (snap != CEPH_NOSNAP) && (snap != CEPH_SNAPDIR);
|
||||
return !is_max() && !is_head() && !is_snapdir();
|
||||
}
|
||||
|
||||
/// @return true iff the object should have a snapset in it's attrs
|
||||
bool has_snapset() const {
|
||||
return !is_snap();
|
||||
return is_head() || is_snapdir();
|
||||
}
|
||||
|
||||
/* Do not use when a particular hash function is needed */
|
||||
|
126
src/osd/PG.cc
126
src/osd/PG.cc
@ -4024,6 +4024,8 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
|
||||
case PG::Scrubber::INACTIVE:
|
||||
dout(10) << "scrub start" << dendl;
|
||||
|
||||
scrubber.cleaned_meta_map.reset_bitwise(get_sort_bitwise());
|
||||
|
||||
publish_stats_to_osd();
|
||||
scrubber.epoch_start = info.history.same_interval_since;
|
||||
scrubber.active = true;
|
||||
@ -4064,54 +4066,54 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
|
||||
scrubber.received_maps.clear();
|
||||
|
||||
{
|
||||
hobject_t candidate_end;
|
||||
|
||||
// get the start and end of our scrub chunk
|
||||
//
|
||||
// start and end need to lie on a hash boundary. We test for this by
|
||||
// requesting a list and searching backward from the end looking for a
|
||||
// boundary. If there's no boundary, we request a list after the first
|
||||
// list, and so forth.
|
||||
|
||||
bool boundary_found = false;
|
||||
/* get the start and end of our scrub chunk
|
||||
*
|
||||
* Our scrub chunk has an important restriction we're going to need to
|
||||
* respect. We can't let head or snapdir be start or end.
|
||||
* Using a half-open interval means that if end == head|snapdir,
|
||||
* we'd scrub/lock head and the clone right next to head in different
|
||||
* chunks which would allow us to miss clones created between
|
||||
* scrubbing that chunk and scrubbing the chunk including head.
|
||||
* This isn't true for any of the other clones since clones can
|
||||
* only be created "just to the left of" head. There is one exception
|
||||
* to this: promotion of clones which always happens to the left of the
|
||||
* left-most clone, but promote_object checks the scrubber in that
|
||||
* case, so it should be ok. Also, it's ok to "miss" clones at the
|
||||
* left end of the range if we are a tier because they may legitimately
|
||||
* not exist (see _scrub).
|
||||
*/
|
||||
unsigned min = MAX(3, cct->_conf->osd_scrub_chunk_min);
|
||||
hobject_t start = scrubber.start;
|
||||
unsigned loop = 0;
|
||||
while (!boundary_found) {
|
||||
vector<hobject_t> objects;
|
||||
ret = get_pgbackend()->objects_list_partial(
|
||||
start,
|
||||
cct->_conf->osd_scrub_chunk_min,
|
||||
cct->_conf->osd_scrub_chunk_max,
|
||||
&objects,
|
||||
&candidate_end);
|
||||
assert(ret >= 0);
|
||||
hobject_t candidate_end;
|
||||
vector<hobject_t> objects;
|
||||
ret = get_pgbackend()->objects_list_partial(
|
||||
start,
|
||||
min,
|
||||
MAX(min, cct->_conf->osd_scrub_chunk_max),
|
||||
&objects,
|
||||
&candidate_end);
|
||||
assert(ret >= 0);
|
||||
|
||||
// in case we don't find a boundary: start again at the end
|
||||
start = candidate_end;
|
||||
|
||||
// special case: reached end of file store, implicitly a boundary
|
||||
if (objects.empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
// search backward from the end looking for a boundary
|
||||
objects.push_back(candidate_end);
|
||||
while (!boundary_found && objects.size() > 1) {
|
||||
hobject_t end = objects.back().get_boundary();
|
||||
objects.pop_back();
|
||||
|
||||
if (objects.back().get_hash() != end.get_hash()) {
|
||||
candidate_end = end;
|
||||
boundary_found = true;
|
||||
}
|
||||
}
|
||||
|
||||
// reset handle once in a while, the search maybe takes long.
|
||||
if (++loop >= g_conf->osd_loop_before_reset_tphandle) {
|
||||
handle.reset_tp_timeout();
|
||||
loop = 0;
|
||||
}
|
||||
}
|
||||
if (!objects.empty()) {
|
||||
hobject_t back = objects.back();
|
||||
while (candidate_end.has_snapset() &&
|
||||
candidate_end.get_head() == back.get_head()) {
|
||||
candidate_end = back;
|
||||
objects.pop_back();
|
||||
if (objects.empty()) {
|
||||
assert(0 ==
|
||||
"Somehow we got more than 2 objects which"
|
||||
"have the same head but are not clones");
|
||||
}
|
||||
back = objects.back();
|
||||
}
|
||||
if (candidate_end.has_snapset()) {
|
||||
assert(candidate_end.get_head() != back.get_head());
|
||||
candidate_end = candidate_end.get_object_boundary();
|
||||
}
|
||||
} else {
|
||||
assert(candidate_end.is_max());
|
||||
}
|
||||
|
||||
if (!_range_available_for_scrub(scrubber.start, candidate_end)) {
|
||||
// we'll be requeued by whatever made us unavailable for scrub
|
||||
@ -4136,7 +4138,8 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
|
||||
}
|
||||
}
|
||||
|
||||
// ask replicas to wait until last_update_applied >= scrubber.subset_last_update and then scan
|
||||
// ask replicas to wait until
|
||||
// last_update_applied >= scrubber.subset_last_update and then scan
|
||||
scrubber.waiting_on_whom.insert(pg_whoami);
|
||||
++scrubber.waiting_on;
|
||||
|
||||
@ -4283,7 +4286,7 @@ void PG::scrub_compare_maps()
|
||||
dout(10) << __func__ << " has maps, analyzing" << dendl;
|
||||
|
||||
// construct authoritative scrub map for type specific scrubbing
|
||||
ScrubMap authmap(scrubber.primary_scrubmap);
|
||||
scrubber.cleaned_meta_map.insert(scrubber.primary_scrubmap);
|
||||
map<hobject_t, pair<uint32_t, uint32_t>, hobject_t::BitwiseComparator> missing_digest;
|
||||
|
||||
if (acting.size() > 1) {
|
||||
@ -4345,13 +4348,34 @@ void PG::scrub_compare_maps()
|
||||
for (map<hobject_t, list<pg_shard_t>, hobject_t::BitwiseComparator>::iterator i = authoritative.begin();
|
||||
i != authoritative.end();
|
||||
++i) {
|
||||
authmap.objects.erase(i->first);
|
||||
authmap.objects.insert(*(maps[i->second.back()]->objects.find(i->first)));
|
||||
scrubber.cleaned_meta_map.objects.erase(i->first);
|
||||
scrubber.cleaned_meta_map.objects.insert(
|
||||
*(maps[i->second.back()]->objects.find(i->first))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
ScrubMap for_meta_scrub(get_sort_bitwise());
|
||||
if (scrubber.end.is_max() ||
|
||||
scrubber.cleaned_meta_map.objects.empty()) {
|
||||
scrubber.cleaned_meta_map.swap(for_meta_scrub);
|
||||
} else {
|
||||
auto iter = scrubber.cleaned_meta_map.objects.end();
|
||||
--iter; // not empty, see if clause
|
||||
auto begin = scrubber.cleaned_meta_map.objects.begin();
|
||||
while (iter != begin) {
|
||||
auto next = iter--;
|
||||
if (next->first.get_head() != iter->first.get_head()) {
|
||||
++iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for_meta_scrub.objects.insert(begin, iter);
|
||||
scrubber.cleaned_meta_map.objects.erase(begin, iter);
|
||||
}
|
||||
|
||||
// ok, do the pg-type specific scrubbing
|
||||
_scrub(authmap, missing_digest);
|
||||
_scrub(for_meta_scrub, missing_digest);
|
||||
if (!scrubber.store->empty()) {
|
||||
if (state_test(PG_STATE_REPAIR)) {
|
||||
dout(10) << __func__ << ": discarding scrub results" << dendl;
|
||||
|
@ -1166,6 +1166,9 @@ public:
|
||||
// Map from object with errors to good peers
|
||||
map<hobject_t, list<pair<ScrubMap::object, pg_shard_t> >, hobject_t::BitwiseComparator> authoritative;
|
||||
|
||||
// Cleaned map pending snap metadata scrub
|
||||
ScrubMap cleaned_meta_map;
|
||||
|
||||
// digest updates which we are waiting on
|
||||
int num_digest_updates_pending;
|
||||
|
||||
@ -1264,6 +1267,7 @@ public:
|
||||
missing.clear();
|
||||
authoritative.clear();
|
||||
num_digest_updates_pending = 0;
|
||||
cleaned_meta_map = ScrubMap();
|
||||
}
|
||||
|
||||
void create_results(const hobject_t& obj);
|
||||
|
@ -5099,9 +5099,9 @@ void ScrubMap::decode(bufferlist::iterator& bl, int64_t pool)
|
||||
|
||||
// handle hobject_t upgrade
|
||||
if (struct_v < 3) {
|
||||
map<hobject_t, object, hobject_t::BitwiseComparator> tmp;
|
||||
map<hobject_t, object, hobject_t::ComparatorWithDefault> tmp;
|
||||
tmp.swap(objects);
|
||||
for (map<hobject_t, object, hobject_t::BitwiseComparator>::iterator i = tmp.begin();
|
||||
for (map<hobject_t, object, hobject_t::ComparatorWithDefault>::iterator i = tmp.begin();
|
||||
i != tmp.end();
|
||||
++i) {
|
||||
hobject_t first(i->first);
|
||||
@ -5117,7 +5117,7 @@ void ScrubMap::dump(Formatter *f) const
|
||||
f->dump_stream("valid_through") << valid_through;
|
||||
f->dump_stream("incremental_since") << incr_since;
|
||||
f->open_array_section("objects");
|
||||
for (map<hobject_t,object, hobject_t::BitwiseComparator>::const_iterator p = objects.begin(); p != objects.end(); ++p) {
|
||||
for (map<hobject_t,object, hobject_t::ComparatorWithDefault>::const_iterator p = objects.begin(); p != objects.end(); ++p) {
|
||||
f->open_object_section("object");
|
||||
f->dump_string("name", p->first.oid.name);
|
||||
f->dump_unsigned("hash", p->first.get_hash());
|
||||
|
@ -4397,15 +4397,38 @@ struct ScrubMap {
|
||||
};
|
||||
WRITE_CLASS_ENCODER(object)
|
||||
|
||||
map<hobject_t,object, hobject_t::BitwiseComparator> objects;
|
||||
bool bitwise; // ephemeral, not encoded
|
||||
map<hobject_t,object, hobject_t::ComparatorWithDefault> objects;
|
||||
eversion_t valid_through;
|
||||
eversion_t incr_since;
|
||||
|
||||
ScrubMap() : bitwise(true) {}
|
||||
ScrubMap(bool bitwise)
|
||||
: bitwise(bitwise), objects(hobject_t::ComparatorWithDefault(bitwise)) {}
|
||||
|
||||
void merge_incr(const ScrubMap &l);
|
||||
void insert(const ScrubMap &r) {
|
||||
objects.insert(r.objects.begin(), r.objects.end());
|
||||
}
|
||||
void swap(ScrubMap &r) {
|
||||
::swap(objects, r.objects);
|
||||
::swap(valid_through, r.valid_through);
|
||||
::swap(incr_since, r.incr_since);
|
||||
}
|
||||
|
||||
void encode(bufferlist& bl) const;
|
||||
void decode(bufferlist::iterator& bl, int64_t pool=-1);
|
||||
void dump(Formatter *f) const;
|
||||
void reset_bitwise(bool new_bitwise) {
|
||||
if (bitwise == new_bitwise)
|
||||
return;
|
||||
map<hobject_t, object, hobject_t::ComparatorWithDefault> new_objects(
|
||||
objects.begin(),
|
||||
objects.end(),
|
||||
hobject_t::ComparatorWithDefault(new_bitwise));
|
||||
::swap(new_objects, objects);
|
||||
bitwise = new_bitwise;
|
||||
}
|
||||
static void generate_test_instances(list<ScrubMap*>& o);
|
||||
};
|
||||
WRITE_CLASS_ENCODER(ScrubMap::object)
|
||||
|
Loading…
Reference in New Issue
Block a user