From 26745ca899b56643481eda6fae65a72941d668e2 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Sat, 8 Feb 2014 13:52:28 -0800 Subject: [PATCH] PGBackend/ReplicatedBackend: move the backend agnostic code into PGBackend for scrub Signed-off-by: Samuel Just --- src/osd/PGBackend.cc | 264 +++++++++++++++++++++++++++++++++++ src/osd/PGBackend.h | 21 +-- src/osd/ReplicatedBackend.cc | 263 ---------------------------------- src/osd/ReplicatedBackend.h | 20 --- 4 files changed, 277 insertions(+), 291 deletions(-) diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index 99e191c45c1..80fca2a5cdc 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -13,6 +13,7 @@ */ +#include "common/errno.h" #include "PGBackend.h" #include "OSD.h" @@ -251,3 +252,266 @@ void PGBackend::trim_stashed_object( t->remove( coll, ghobject_t(hoid, old_version, get_parent()->whoami_shard().shard)); } + +/* + * pg lock may or may not be held + */ +void PGBackend::be_scan_list( + ScrubMap &map, const vector &ls, bool deep, + ThreadPool::TPHandle &handle) +{ + dout(10) << "_scan_list scanning " << ls.size() << " objects" + << (deep ? " deeply" : "") << dendl; + int i = 0; + for (vector::const_iterator p = ls.begin(); + p != ls.end(); + ++p, i++) { + handle.reset_tp_timeout(); + hobject_t poid = *p; + + struct stat st; + int r = store->stat( + coll, + ghobject_t( + poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), + &st, + true); + if (r == 0) { + ScrubMap::object &o = map.objects[poid]; + o.size = st.st_size; + assert(!o.negative); + store->getattrs( + coll, + ghobject_t( + poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), + o.attrs); + + // calculate the CRC32 on deep scrubs + if (deep) { + be_deep_scrub(*p, o, handle); + } + + dout(25) << "_scan_list " << poid << dendl; + } else if (r == -ENOENT) { + dout(25) << "_scan_list " << poid << " got " << r << ", skipping" << dendl; + } else if (r == -EIO) { + dout(25) << "_scan_list " << poid << " got " << r << ", read_error" << dendl; + ScrubMap::object &o = map.objects[poid]; + o.read_error = true; + } else { + derr << "_scan_list got: " << cpp_strerror(r) << dendl; + assert(0); + } + } +} + +enum scrub_error_type PGBackend::be_compare_scrub_objects( + const ScrubMap::object &auth, + const ScrubMap::object &candidate, + ostream &errorstream) +{ + enum scrub_error_type error = CLEAN; + if (candidate.read_error) { + // This can occur on stat() of a shallow scrub, but in that case size will + // be invalid, and this will be over-ridden below. + error = DEEP_ERROR; + errorstream << "candidate had a read error"; + } + if (auth.digest_present && candidate.digest_present) { + if (auth.digest != candidate.digest) { + if (error != CLEAN) + errorstream << ", "; + error = DEEP_ERROR; + + errorstream << "digest " << candidate.digest + << " != known digest " << auth.digest; + } + } + if (auth.omap_digest_present && candidate.omap_digest_present) { + if (auth.omap_digest != candidate.omap_digest) { + if (error != CLEAN) + errorstream << ", "; + error = DEEP_ERROR; + + errorstream << "omap_digest " << candidate.omap_digest + << " != known omap_digest " << auth.omap_digest; + } + } + // Shallow error takes precendence because this will be seen by + // both types of scrubs. + if (auth.size != candidate.size) { + if (error != CLEAN) + errorstream << ", "; + error = SHALLOW_ERROR; + errorstream << "size " << candidate.size + << " != known size " << auth.size; + } + for (map::const_iterator i = auth.attrs.begin(); + i != auth.attrs.end(); + ++i) { + if (!candidate.attrs.count(i->first)) { + if (error != CLEAN) + errorstream << ", "; + error = SHALLOW_ERROR; + errorstream << "missing attr " << i->first; + } else if (candidate.attrs.find(i->first)->second.cmp(i->second)) { + if (error != CLEAN) + errorstream << ", "; + error = SHALLOW_ERROR; + errorstream << "attr value mismatch " << i->first; + } + } + for (map::const_iterator i = candidate.attrs.begin(); + i != candidate.attrs.end(); + ++i) { + if (!auth.attrs.count(i->first)) { + if (error != CLEAN) + errorstream << ", "; + error = SHALLOW_ERROR; + errorstream << "extra attr " << i->first; + } + } + return error; +} + +map::const_iterator + PGBackend::be_select_auth_object( + const hobject_t &obj, + const map &maps) +{ + map::const_iterator auth = maps.end(); + for (map::const_iterator j = maps.begin(); + j != maps.end(); + ++j) { + map::iterator i = + j->second->objects.find(obj); + if (i == j->second->objects.end()) { + continue; + } + if (auth == maps.end()) { + // Something is better than nothing + // TODO: something is NOT better than nothing, do something like + // unfound_lost if no valid copies can be found, or just mark unfound + auth = j; + dout(10) << __func__ << ": selecting osd " << j->first + << " for obj " << obj + << ", auth == maps.end()" + << dendl; + continue; + } + if (i->second.read_error) { + // scrub encountered read error, probably corrupt + dout(10) << __func__ << ": rejecting osd " << j->first + << " for obj " << obj + << ", read_error" + << dendl; + continue; + } + map::iterator k = i->second.attrs.find(OI_ATTR); + if (k == i->second.attrs.end()) { + // no object info on object, probably corrupt + dout(10) << __func__ << ": rejecting osd " << j->first + << " for obj " << obj + << ", no oi attr" + << dendl; + continue; + } + bufferlist bl; + bl.push_back(k->second); + object_info_t oi; + try { + bufferlist::iterator bliter = bl.begin(); + ::decode(oi, bliter); + } catch (...) { + dout(10) << __func__ << ": rejecting osd " << j->first + << " for obj " << obj + << ", corrupt oi attr" + << dendl; + // invalid object info, probably corrupt + continue; + } + if (oi.size != i->second.size) { + // invalid size, probably corrupt + dout(10) << __func__ << ": rejecting osd " << j->first + << " for obj " << obj + << ", size mismatch" + << dendl; + // invalid object info, probably corrupt + continue; + } + dout(10) << __func__ << ": selecting osd " << j->first + << " for obj " << obj + << dendl; + auth = j; + } + return auth; +} + +void PGBackend::be_compare_scrubmaps( + const map &maps, + map > &missing, + map > &inconsistent, + map &authoritative, + map > &invalid_snapcolls, + int &shallow_errors, int &deep_errors, + const spg_t pgid, + const vector &acting, + ostream &errorstream) +{ + map::const_iterator i; + map::const_iterator j; + set master_set; + + // Construct master set + for (j = maps.begin(); j != maps.end(); ++j) { + for (i = j->second->objects.begin(); i != j->second->objects.end(); ++i) { + master_set.insert(i->first); + } + } + + // Check maps against master set and each other + for (set::const_iterator k = master_set.begin(); + k != master_set.end(); + ++k) { + map::const_iterator auth = + be_select_auth_object(*k, maps); + assert(auth != maps.end()); + set cur_missing; + set cur_inconsistent; + for (j = maps.begin(); j != maps.end(); ++j) { + if (j == auth) + continue; + if (j->second->objects.count(*k)) { + // Compare + stringstream ss; + enum scrub_error_type error = be_compare_scrub_objects(auth->second->objects[*k], + j->second->objects[*k], + ss); + if (error != CLEAN) { + cur_inconsistent.insert(j->first); + if (error == SHALLOW_ERROR) + ++shallow_errors; + else + ++deep_errors; + errorstream << pgid << " shard " << j->first + << ": soid " << *k << " " << ss.str() << std::endl; + } + } else { + cur_missing.insert(j->first); + ++shallow_errors; + errorstream << pgid << " shard " << j->first + << " missing " << *k << std::endl; + } + } + assert(auth != maps.end()); + if (!cur_missing.empty()) { + missing[*k] = cur_missing; + } + if (!cur_inconsistent.empty()) { + inconsistent[*k] = cur_inconsistent; + } + if (!cur_inconsistent.empty() || !cur_missing.empty()) { + authoritative[*k] = auth->first; + } + } +} diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index fe067effcd9..aecd3ae078b 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -563,17 +563,17 @@ Context *on_complete) = 0; virtual bool scrub_supported() { return false; } - virtual void be_scan_list( + void be_scan_list( ScrubMap &map, const vector &ls, bool deep, - ThreadPool::TPHandle &handle) { assert(0); } - virtual enum scrub_error_type be_compare_scrub_objects( + ThreadPool::TPHandle &handle); + enum scrub_error_type be_compare_scrub_objects( const ScrubMap::object &auth, const ScrubMap::object &candidate, - ostream &errorstream) { assert(0); } - virtual map::const_iterator be_select_auth_object( + ostream &errorstream); + map::const_iterator be_select_auth_object( const hobject_t &obj, - const map &maps) { assert(0); } - virtual void be_compare_scrubmaps( + const map &maps); + void be_compare_scrubmaps( const map &maps, map > &missing, map > &inconsistent, @@ -582,7 +582,12 @@ int &shallow_errors, int &deep_errors, const spg_t pgid, const vector &acting, - ostream &errorstream) { assert(0); } + ostream &errorstream); + + virtual void be_deep_scrub( + const hobject_t &poid, + ScrubMap::object &o, + ThreadPool::TPHandle &handle) { assert(0); } }; struct PG_SendMessageOnConn: public Context { diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index 3bb17c540e9..de550db0cbf 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -731,266 +731,3 @@ void ReplicatedBackend::be_deep_scrub( o.omap_digest = oh.digest(); o.omap_digest_present = true; } - -/* - * pg lock may or may not be held - */ -void ReplicatedBackend::be_scan_list( - ScrubMap &map, const vector &ls, bool deep, - ThreadPool::TPHandle &handle) -{ - dout(10) << "_scan_list scanning " << ls.size() << " objects" - << (deep ? " deeply" : "") << dendl; - int i = 0; - for (vector::const_iterator p = ls.begin(); - p != ls.end(); - ++p, i++) { - handle.reset_tp_timeout(); - hobject_t poid = *p; - - struct stat st; - int r = store->stat( - coll, - ghobject_t( - poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), - &st, - true); - if (r == 0) { - ScrubMap::object &o = map.objects[poid]; - o.size = st.st_size; - assert(!o.negative); - store->getattrs( - coll, - ghobject_t( - poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), - o.attrs); - - // calculate the CRC32 on deep scrubs - if (deep) { - be_deep_scrub(*p, o, handle); - } - - dout(25) << "_scan_list " << poid << dendl; - } else if (r == -ENOENT) { - dout(25) << "_scan_list " << poid << " got " << r << ", skipping" << dendl; - } else if (r == -EIO) { - dout(25) << "_scan_list " << poid << " got " << r << ", read_error" << dendl; - ScrubMap::object &o = map.objects[poid]; - o.read_error = true; - } else { - derr << "_scan_list got: " << cpp_strerror(r) << dendl; - assert(0); - } - } -} - -enum scrub_error_type ReplicatedBackend::be_compare_scrub_objects( - const ScrubMap::object &auth, - const ScrubMap::object &candidate, - ostream &errorstream) -{ - enum scrub_error_type error = CLEAN; - if (candidate.read_error) { - // This can occur on stat() of a shallow scrub, but in that case size will - // be invalid, and this will be over-ridden below. - error = DEEP_ERROR; - errorstream << "candidate had a read error"; - } - if (auth.digest_present && candidate.digest_present) { - if (auth.digest != candidate.digest) { - if (error != CLEAN) - errorstream << ", "; - error = DEEP_ERROR; - - errorstream << "digest " << candidate.digest - << " != known digest " << auth.digest; - } - } - if (auth.omap_digest_present && candidate.omap_digest_present) { - if (auth.omap_digest != candidate.omap_digest) { - if (error != CLEAN) - errorstream << ", "; - error = DEEP_ERROR; - - errorstream << "omap_digest " << candidate.omap_digest - << " != known omap_digest " << auth.omap_digest; - } - } - // Shallow error takes precendence because this will be seen by - // both types of scrubs. - if (auth.size != candidate.size) { - if (error != CLEAN) - errorstream << ", "; - error = SHALLOW_ERROR; - errorstream << "size " << candidate.size - << " != known size " << auth.size; - } - for (map::const_iterator i = auth.attrs.begin(); - i != auth.attrs.end(); - ++i) { - if (!candidate.attrs.count(i->first)) { - if (error != CLEAN) - errorstream << ", "; - error = SHALLOW_ERROR; - errorstream << "missing attr " << i->first; - } else if (candidate.attrs.find(i->first)->second.cmp(i->second)) { - if (error != CLEAN) - errorstream << ", "; - error = SHALLOW_ERROR; - errorstream << "attr value mismatch " << i->first; - } - } - for (map::const_iterator i = candidate.attrs.begin(); - i != candidate.attrs.end(); - ++i) { - if (!auth.attrs.count(i->first)) { - if (error != CLEAN) - errorstream << ", "; - error = SHALLOW_ERROR; - errorstream << "extra attr " << i->first; - } - } - return error; -} - -map::const_iterator - ReplicatedBackend::be_select_auth_object( - const hobject_t &obj, - const map &maps) -{ - map::const_iterator auth = maps.end(); - for (map::const_iterator j = maps.begin(); - j != maps.end(); - ++j) { - map::iterator i = - j->second->objects.find(obj); - if (i == j->second->objects.end()) { - continue; - } - if (auth == maps.end()) { - // Something is better than nothing - // TODO: something is NOT better than nothing, do something like - // unfound_lost if no valid copies can be found, or just mark unfound - auth = j; - dout(10) << __func__ << ": selecting osd " << j->first - << " for obj " << obj - << ", auth == maps.end()" - << dendl; - continue; - } - if (i->second.read_error) { - // scrub encountered read error, probably corrupt - dout(10) << __func__ << ": rejecting osd " << j->first - << " for obj " << obj - << ", read_error" - << dendl; - continue; - } - map::iterator k = i->second.attrs.find(OI_ATTR); - if (k == i->second.attrs.end()) { - // no object info on object, probably corrupt - dout(10) << __func__ << ": rejecting osd " << j->first - << " for obj " << obj - << ", no oi attr" - << dendl; - continue; - } - bufferlist bl; - bl.push_back(k->second); - object_info_t oi; - try { - bufferlist::iterator bliter = bl.begin(); - ::decode(oi, bliter); - } catch (...) { - dout(10) << __func__ << ": rejecting osd " << j->first - << " for obj " << obj - << ", corrupt oi attr" - << dendl; - // invalid object info, probably corrupt - continue; - } - if (oi.size != i->second.size) { - // invalid size, probably corrupt - dout(10) << __func__ << ": rejecting osd " << j->first - << " for obj " << obj - << ", size mismatch" - << dendl; - // invalid object info, probably corrupt - continue; - } - dout(10) << __func__ << ": selecting osd " << j->first - << " for obj " << obj - << dendl; - auth = j; - } - return auth; -} - -void ReplicatedBackend::be_compare_scrubmaps( - const map &maps, - map > &missing, - map > &inconsistent, - map &authoritative, - map > &invalid_snapcolls, - int &shallow_errors, int &deep_errors, - const spg_t pgid, - const vector &acting, - ostream &errorstream) -{ - map::const_iterator i; - map::const_iterator j; - set master_set; - - // Construct master set - for (j = maps.begin(); j != maps.end(); ++j) { - for (i = j->second->objects.begin(); i != j->second->objects.end(); ++i) { - master_set.insert(i->first); - } - } - - // Check maps against master set and each other - for (set::const_iterator k = master_set.begin(); - k != master_set.end(); - ++k) { - map::const_iterator auth = - be_select_auth_object(*k, maps); - assert(auth != maps.end()); - set cur_missing; - set cur_inconsistent; - for (j = maps.begin(); j != maps.end(); ++j) { - if (j == auth) - continue; - if (j->second->objects.count(*k)) { - // Compare - stringstream ss; - enum scrub_error_type error = be_compare_scrub_objects(auth->second->objects[*k], - j->second->objects[*k], - ss); - if (error != CLEAN) { - cur_inconsistent.insert(j->first); - if (error == SHALLOW_ERROR) - ++shallow_errors; - else - ++deep_errors; - errorstream << pgid << " shard " << j->first - << ": soid " << *k << " " << ss.str() << std::endl; - } - } else { - cur_missing.insert(j->first); - ++shallow_errors; - errorstream << pgid << " shard " << j->first - << " missing " << *k << std::endl; - } - } - assert(auth != maps.end()); - if (!cur_missing.empty()) { - missing[*k] = cur_missing; - } - if (!cur_inconsistent.empty()) { - inconsistent[*k] = cur_inconsistent; - } - if (!cur_inconsistent.empty() || !cur_missing.empty()) { - authoritative[*k] = auth->first; - } - } -} diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h index 4dca081f7a6..87022bb4632 100644 --- a/src/osd/ReplicatedBackend.h +++ b/src/osd/ReplicatedBackend.h @@ -403,26 +403,6 @@ private: void sub_op_modify_commit(RepModifyRef rm); bool scrub_supported() { return true; } - void be_scan_list( - ScrubMap &map, const vector &ls, bool deep, - ThreadPool::TPHandle &handle); - enum scrub_error_type be_compare_scrub_objects( - const ScrubMap::object &auth, - const ScrubMap::object &candidate, - ostream &errorstream); - map::const_iterator be_select_auth_object( - const hobject_t &obj, - const map &maps); - void be_compare_scrubmaps( - const map &maps, - map > &missing, - map > &inconsistent, - map &authoritative, - map > &invalid_snapcolls, - int &shallow_errors, int &deep_errors, - const spg_t pgid, - const vector &acting, - ostream &errorstream); void be_deep_scrub( const hobject_t &obj, ScrubMap::object &o,