PGBackend/ReplicatedBackend: move the backend agnostic code into PGBackend for scrub

Signed-off-by: Samuel Just <sam.just@inktank.com>
This commit is contained in:
Samuel Just 2014-02-08 13:52:28 -08:00
parent 1835c29f6d
commit 26745ca899
4 changed files with 277 additions and 291 deletions

View File

@ -13,6 +13,7 @@
*/
#include "common/errno.h"
#include "PGBackend.h"
#include "OSD.h"
@ -251,3 +252,266 @@ void PGBackend::trim_stashed_object(
t->remove(
coll, ghobject_t(hoid, old_version, get_parent()->whoami_shard().shard));
}
/*
* pg lock may or may not be held
*/
void PGBackend::be_scan_list(
ScrubMap &map, const vector<hobject_t> &ls, bool deep,
ThreadPool::TPHandle &handle)
{
dout(10) << "_scan_list scanning " << ls.size() << " objects"
<< (deep ? " deeply" : "") << dendl;
int i = 0;
for (vector<hobject_t>::const_iterator p = ls.begin();
p != ls.end();
++p, i++) {
handle.reset_tp_timeout();
hobject_t poid = *p;
struct stat st;
int r = store->stat(
coll,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
&st,
true);
if (r == 0) {
ScrubMap::object &o = map.objects[poid];
o.size = st.st_size;
assert(!o.negative);
store->getattrs(
coll,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
o.attrs);
// calculate the CRC32 on deep scrubs
if (deep) {
be_deep_scrub(*p, o, handle);
}
dout(25) << "_scan_list " << poid << dendl;
} else if (r == -ENOENT) {
dout(25) << "_scan_list " << poid << " got " << r << ", skipping" << dendl;
} else if (r == -EIO) {
dout(25) << "_scan_list " << poid << " got " << r << ", read_error" << dendl;
ScrubMap::object &o = map.objects[poid];
o.read_error = true;
} else {
derr << "_scan_list got: " << cpp_strerror(r) << dendl;
assert(0);
}
}
}
enum scrub_error_type PGBackend::be_compare_scrub_objects(
const ScrubMap::object &auth,
const ScrubMap::object &candidate,
ostream &errorstream)
{
enum scrub_error_type error = CLEAN;
if (candidate.read_error) {
// This can occur on stat() of a shallow scrub, but in that case size will
// be invalid, and this will be over-ridden below.
error = DEEP_ERROR;
errorstream << "candidate had a read error";
}
if (auth.digest_present && candidate.digest_present) {
if (auth.digest != candidate.digest) {
if (error != CLEAN)
errorstream << ", ";
error = DEEP_ERROR;
errorstream << "digest " << candidate.digest
<< " != known digest " << auth.digest;
}
}
if (auth.omap_digest_present && candidate.omap_digest_present) {
if (auth.omap_digest != candidate.omap_digest) {
if (error != CLEAN)
errorstream << ", ";
error = DEEP_ERROR;
errorstream << "omap_digest " << candidate.omap_digest
<< " != known omap_digest " << auth.omap_digest;
}
}
// Shallow error takes precendence because this will be seen by
// both types of scrubs.
if (auth.size != candidate.size) {
if (error != CLEAN)
errorstream << ", ";
error = SHALLOW_ERROR;
errorstream << "size " << candidate.size
<< " != known size " << auth.size;
}
for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
i != auth.attrs.end();
++i) {
if (!candidate.attrs.count(i->first)) {
if (error != CLEAN)
errorstream << ", ";
error = SHALLOW_ERROR;
errorstream << "missing attr " << i->first;
} else if (candidate.attrs.find(i->first)->second.cmp(i->second)) {
if (error != CLEAN)
errorstream << ", ";
error = SHALLOW_ERROR;
errorstream << "attr value mismatch " << i->first;
}
}
for (map<string,bufferptr>::const_iterator i = candidate.attrs.begin();
i != candidate.attrs.end();
++i) {
if (!auth.attrs.count(i->first)) {
if (error != CLEAN)
errorstream << ", ";
error = SHALLOW_ERROR;
errorstream << "extra attr " << i->first;
}
}
return error;
}
map<pg_shard_t, ScrubMap *>::const_iterator
PGBackend::be_select_auth_object(
const hobject_t &obj,
const map<pg_shard_t,ScrubMap*> &maps)
{
map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
for (map<pg_shard_t, ScrubMap *>::const_iterator j = maps.begin();
j != maps.end();
++j) {
map<hobject_t, ScrubMap::object>::iterator i =
j->second->objects.find(obj);
if (i == j->second->objects.end()) {
continue;
}
if (auth == maps.end()) {
// Something is better than nothing
// TODO: something is NOT better than nothing, do something like
// unfound_lost if no valid copies can be found, or just mark unfound
auth = j;
dout(10) << __func__ << ": selecting osd " << j->first
<< " for obj " << obj
<< ", auth == maps.end()"
<< dendl;
continue;
}
if (i->second.read_error) {
// scrub encountered read error, probably corrupt
dout(10) << __func__ << ": rejecting osd " << j->first
<< " for obj " << obj
<< ", read_error"
<< dendl;
continue;
}
map<string, bufferptr>::iterator k = i->second.attrs.find(OI_ATTR);
if (k == i->second.attrs.end()) {
// no object info on object, probably corrupt
dout(10) << __func__ << ": rejecting osd " << j->first
<< " for obj " << obj
<< ", no oi attr"
<< dendl;
continue;
}
bufferlist bl;
bl.push_back(k->second);
object_info_t oi;
try {
bufferlist::iterator bliter = bl.begin();
::decode(oi, bliter);
} catch (...) {
dout(10) << __func__ << ": rejecting osd " << j->first
<< " for obj " << obj
<< ", corrupt oi attr"
<< dendl;
// invalid object info, probably corrupt
continue;
}
if (oi.size != i->second.size) {
// invalid size, probably corrupt
dout(10) << __func__ << ": rejecting osd " << j->first
<< " for obj " << obj
<< ", size mismatch"
<< dendl;
// invalid object info, probably corrupt
continue;
}
dout(10) << __func__ << ": selecting osd " << j->first
<< " for obj " << obj
<< dendl;
auth = j;
}
return auth;
}
void PGBackend::be_compare_scrubmaps(
const map<pg_shard_t,ScrubMap*> &maps,
map<hobject_t, set<pg_shard_t> > &missing,
map<hobject_t, set<pg_shard_t> > &inconsistent,
map<hobject_t, pg_shard_t> &authoritative,
map<hobject_t, set<pg_shard_t> > &invalid_snapcolls,
int &shallow_errors, int &deep_errors,
const spg_t pgid,
const vector<int> &acting,
ostream &errorstream)
{
map<hobject_t,ScrubMap::object>::const_iterator i;
map<pg_shard_t, ScrubMap *>::const_iterator j;
set<hobject_t> master_set;
// Construct master set
for (j = maps.begin(); j != maps.end(); ++j) {
for (i = j->second->objects.begin(); i != j->second->objects.end(); ++i) {
master_set.insert(i->first);
}
}
// Check maps against master set and each other
for (set<hobject_t>::const_iterator k = master_set.begin();
k != master_set.end();
++k) {
map<pg_shard_t, ScrubMap *>::const_iterator auth =
be_select_auth_object(*k, maps);
assert(auth != maps.end());
set<pg_shard_t> cur_missing;
set<pg_shard_t> cur_inconsistent;
for (j = maps.begin(); j != maps.end(); ++j) {
if (j == auth)
continue;
if (j->second->objects.count(*k)) {
// Compare
stringstream ss;
enum scrub_error_type error = be_compare_scrub_objects(auth->second->objects[*k],
j->second->objects[*k],
ss);
if (error != CLEAN) {
cur_inconsistent.insert(j->first);
if (error == SHALLOW_ERROR)
++shallow_errors;
else
++deep_errors;
errorstream << pgid << " shard " << j->first
<< ": soid " << *k << " " << ss.str() << std::endl;
}
} else {
cur_missing.insert(j->first);
++shallow_errors;
errorstream << pgid << " shard " << j->first
<< " missing " << *k << std::endl;
}
}
assert(auth != maps.end());
if (!cur_missing.empty()) {
missing[*k] = cur_missing;
}
if (!cur_inconsistent.empty()) {
inconsistent[*k] = cur_inconsistent;
}
if (!cur_inconsistent.empty() || !cur_missing.empty()) {
authoritative[*k] = auth->first;
}
}
}

View File

@ -563,17 +563,17 @@
Context *on_complete) = 0;
virtual bool scrub_supported() { return false; }
virtual void be_scan_list(
void be_scan_list(
ScrubMap &map, const vector<hobject_t> &ls, bool deep,
ThreadPool::TPHandle &handle) { assert(0); }
virtual enum scrub_error_type be_compare_scrub_objects(
ThreadPool::TPHandle &handle);
enum scrub_error_type be_compare_scrub_objects(
const ScrubMap::object &auth,
const ScrubMap::object &candidate,
ostream &errorstream) { assert(0); }
virtual map<pg_shard_t, ScrubMap *>::const_iterator be_select_auth_object(
ostream &errorstream);
map<pg_shard_t, ScrubMap *>::const_iterator be_select_auth_object(
const hobject_t &obj,
const map<pg_shard_t,ScrubMap*> &maps) { assert(0); }
virtual void be_compare_scrubmaps(
const map<pg_shard_t,ScrubMap*> &maps);
void be_compare_scrubmaps(
const map<pg_shard_t,ScrubMap*> &maps,
map<hobject_t, set<pg_shard_t> > &missing,
map<hobject_t, set<pg_shard_t> > &inconsistent,
@ -582,7 +582,12 @@
int &shallow_errors, int &deep_errors,
const spg_t pgid,
const vector<int> &acting,
ostream &errorstream) { assert(0); }
ostream &errorstream);
virtual void be_deep_scrub(
const hobject_t &poid,
ScrubMap::object &o,
ThreadPool::TPHandle &handle) { assert(0); }
};
struct PG_SendMessageOnConn: public Context {

View File

@ -731,266 +731,3 @@ void ReplicatedBackend::be_deep_scrub(
o.omap_digest = oh.digest();
o.omap_digest_present = true;
}
/*
* pg lock may or may not be held
*/
void ReplicatedBackend::be_scan_list(
ScrubMap &map, const vector<hobject_t> &ls, bool deep,
ThreadPool::TPHandle &handle)
{
dout(10) << "_scan_list scanning " << ls.size() << " objects"
<< (deep ? " deeply" : "") << dendl;
int i = 0;
for (vector<hobject_t>::const_iterator p = ls.begin();
p != ls.end();
++p, i++) {
handle.reset_tp_timeout();
hobject_t poid = *p;
struct stat st;
int r = store->stat(
coll,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
&st,
true);
if (r == 0) {
ScrubMap::object &o = map.objects[poid];
o.size = st.st_size;
assert(!o.negative);
store->getattrs(
coll,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
o.attrs);
// calculate the CRC32 on deep scrubs
if (deep) {
be_deep_scrub(*p, o, handle);
}
dout(25) << "_scan_list " << poid << dendl;
} else if (r == -ENOENT) {
dout(25) << "_scan_list " << poid << " got " << r << ", skipping" << dendl;
} else if (r == -EIO) {
dout(25) << "_scan_list " << poid << " got " << r << ", read_error" << dendl;
ScrubMap::object &o = map.objects[poid];
o.read_error = true;
} else {
derr << "_scan_list got: " << cpp_strerror(r) << dendl;
assert(0);
}
}
}
enum scrub_error_type ReplicatedBackend::be_compare_scrub_objects(
const ScrubMap::object &auth,
const ScrubMap::object &candidate,
ostream &errorstream)
{
enum scrub_error_type error = CLEAN;
if (candidate.read_error) {
// This can occur on stat() of a shallow scrub, but in that case size will
// be invalid, and this will be over-ridden below.
error = DEEP_ERROR;
errorstream << "candidate had a read error";
}
if (auth.digest_present && candidate.digest_present) {
if (auth.digest != candidate.digest) {
if (error != CLEAN)
errorstream << ", ";
error = DEEP_ERROR;
errorstream << "digest " << candidate.digest
<< " != known digest " << auth.digest;
}
}
if (auth.omap_digest_present && candidate.omap_digest_present) {
if (auth.omap_digest != candidate.omap_digest) {
if (error != CLEAN)
errorstream << ", ";
error = DEEP_ERROR;
errorstream << "omap_digest " << candidate.omap_digest
<< " != known omap_digest " << auth.omap_digest;
}
}
// Shallow error takes precendence because this will be seen by
// both types of scrubs.
if (auth.size != candidate.size) {
if (error != CLEAN)
errorstream << ", ";
error = SHALLOW_ERROR;
errorstream << "size " << candidate.size
<< " != known size " << auth.size;
}
for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
i != auth.attrs.end();
++i) {
if (!candidate.attrs.count(i->first)) {
if (error != CLEAN)
errorstream << ", ";
error = SHALLOW_ERROR;
errorstream << "missing attr " << i->first;
} else if (candidate.attrs.find(i->first)->second.cmp(i->second)) {
if (error != CLEAN)
errorstream << ", ";
error = SHALLOW_ERROR;
errorstream << "attr value mismatch " << i->first;
}
}
for (map<string,bufferptr>::const_iterator i = candidate.attrs.begin();
i != candidate.attrs.end();
++i) {
if (!auth.attrs.count(i->first)) {
if (error != CLEAN)
errorstream << ", ";
error = SHALLOW_ERROR;
errorstream << "extra attr " << i->first;
}
}
return error;
}
map<pg_shard_t, ScrubMap *>::const_iterator
ReplicatedBackend::be_select_auth_object(
const hobject_t &obj,
const map<pg_shard_t,ScrubMap*> &maps)
{
map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
for (map<pg_shard_t, ScrubMap *>::const_iterator j = maps.begin();
j != maps.end();
++j) {
map<hobject_t, ScrubMap::object>::iterator i =
j->second->objects.find(obj);
if (i == j->second->objects.end()) {
continue;
}
if (auth == maps.end()) {
// Something is better than nothing
// TODO: something is NOT better than nothing, do something like
// unfound_lost if no valid copies can be found, or just mark unfound
auth = j;
dout(10) << __func__ << ": selecting osd " << j->first
<< " for obj " << obj
<< ", auth == maps.end()"
<< dendl;
continue;
}
if (i->second.read_error) {
// scrub encountered read error, probably corrupt
dout(10) << __func__ << ": rejecting osd " << j->first
<< " for obj " << obj
<< ", read_error"
<< dendl;
continue;
}
map<string, bufferptr>::iterator k = i->second.attrs.find(OI_ATTR);
if (k == i->second.attrs.end()) {
// no object info on object, probably corrupt
dout(10) << __func__ << ": rejecting osd " << j->first
<< " for obj " << obj
<< ", no oi attr"
<< dendl;
continue;
}
bufferlist bl;
bl.push_back(k->second);
object_info_t oi;
try {
bufferlist::iterator bliter = bl.begin();
::decode(oi, bliter);
} catch (...) {
dout(10) << __func__ << ": rejecting osd " << j->first
<< " for obj " << obj
<< ", corrupt oi attr"
<< dendl;
// invalid object info, probably corrupt
continue;
}
if (oi.size != i->second.size) {
// invalid size, probably corrupt
dout(10) << __func__ << ": rejecting osd " << j->first
<< " for obj " << obj
<< ", size mismatch"
<< dendl;
// invalid object info, probably corrupt
continue;
}
dout(10) << __func__ << ": selecting osd " << j->first
<< " for obj " << obj
<< dendl;
auth = j;
}
return auth;
}
void ReplicatedBackend::be_compare_scrubmaps(
const map<pg_shard_t,ScrubMap*> &maps,
map<hobject_t, set<pg_shard_t> > &missing,
map<hobject_t, set<pg_shard_t> > &inconsistent,
map<hobject_t, pg_shard_t> &authoritative,
map<hobject_t, set<pg_shard_t> > &invalid_snapcolls,
int &shallow_errors, int &deep_errors,
const spg_t pgid,
const vector<int> &acting,
ostream &errorstream)
{
map<hobject_t,ScrubMap::object>::const_iterator i;
map<pg_shard_t, ScrubMap *>::const_iterator j;
set<hobject_t> master_set;
// Construct master set
for (j = maps.begin(); j != maps.end(); ++j) {
for (i = j->second->objects.begin(); i != j->second->objects.end(); ++i) {
master_set.insert(i->first);
}
}
// Check maps against master set and each other
for (set<hobject_t>::const_iterator k = master_set.begin();
k != master_set.end();
++k) {
map<pg_shard_t, ScrubMap *>::const_iterator auth =
be_select_auth_object(*k, maps);
assert(auth != maps.end());
set<pg_shard_t> cur_missing;
set<pg_shard_t> cur_inconsistent;
for (j = maps.begin(); j != maps.end(); ++j) {
if (j == auth)
continue;
if (j->second->objects.count(*k)) {
// Compare
stringstream ss;
enum scrub_error_type error = be_compare_scrub_objects(auth->second->objects[*k],
j->second->objects[*k],
ss);
if (error != CLEAN) {
cur_inconsistent.insert(j->first);
if (error == SHALLOW_ERROR)
++shallow_errors;
else
++deep_errors;
errorstream << pgid << " shard " << j->first
<< ": soid " << *k << " " << ss.str() << std::endl;
}
} else {
cur_missing.insert(j->first);
++shallow_errors;
errorstream << pgid << " shard " << j->first
<< " missing " << *k << std::endl;
}
}
assert(auth != maps.end());
if (!cur_missing.empty()) {
missing[*k] = cur_missing;
}
if (!cur_inconsistent.empty()) {
inconsistent[*k] = cur_inconsistent;
}
if (!cur_inconsistent.empty() || !cur_missing.empty()) {
authoritative[*k] = auth->first;
}
}
}

View File

@ -403,26 +403,6 @@ private:
void sub_op_modify_commit(RepModifyRef rm);
bool scrub_supported() { return true; }
void be_scan_list(
ScrubMap &map, const vector<hobject_t> &ls, bool deep,
ThreadPool::TPHandle &handle);
enum scrub_error_type be_compare_scrub_objects(
const ScrubMap::object &auth,
const ScrubMap::object &candidate,
ostream &errorstream);
map<pg_shard_t, ScrubMap *>::const_iterator be_select_auth_object(
const hobject_t &obj,
const map<pg_shard_t,ScrubMap*> &maps);
void be_compare_scrubmaps(
const map<pg_shard_t,ScrubMap*> &maps,
map<hobject_t, set<pg_shard_t> > &missing,
map<hobject_t, set<pg_shard_t> > &inconsistent,
map<hobject_t, pg_shard_t> &authoritative,
map<hobject_t, set<pg_shard_t> > &invalid_snapcolls,
int &shallow_errors, int &deep_errors,
const spg_t pgid,
const vector<int> &acting,
ostream &errorstream);
void be_deep_scrub(
const hobject_t &obj,
ScrubMap::object &o,