Merge pull request #16407 from dzafman/wip-20243

osd: Improve size scrub error handling and ignore system attrs in xattr checking

Reviewed-by: Kefu Chai <kchai@redhat.com>
This commit is contained in:
David Zafman 2017-08-17 20:05:40 -07:00 committed by GitHub
commit 021177b790
9 changed files with 477 additions and 222 deletions

View File

@ -66,7 +66,10 @@
"ec_hash_error",
"ec_size_error",
"oi_attr_missing",
"oi_attr_corrupted"
"oi_attr_corrupted",
"obj_size_oi_mismatch",
"ss_attr_missing",
"ss_attr_corrupted"
]
},
"minItems": 0,
@ -104,6 +107,9 @@
"osd": {
"type": "integer"
},
"primary": {
"type": "boolean"
},
"size": {
"type": "integer"
},
@ -129,7 +135,10 @@
"ec_hash_error",
"ec_size_error",
"oi_attr_missing",
"oi_attr_corrupted"
"oi_attr_corrupted",
"obj_size_oi_mismatch",
"ss_attr_missing",
"ss_attr_corrupted"
]
},
"minItems": 0,
@ -164,6 +173,7 @@
},
"required": [
"osd",
"primary",
"errors"
]
}

File diff suppressed because it is too large Load Diff

View File

@ -70,8 +70,9 @@ void shard_info_wrapper::set_object(const ScrubMap::object& object)
void shard_info_wrapper::encode(bufferlist& bl) const
{
ENCODE_START(2, 1, bl);
ENCODE_START(3, 3, bl);
::encode(errors, bl);
::encode(primary, bl);
if (has_shard_missing()) {
return;
}
@ -87,8 +88,9 @@ void shard_info_wrapper::encode(bufferlist& bl) const
void shard_info_wrapper::decode(bufferlist::iterator& bp)
{
DECODE_START(2, bp);
DECODE_START(3, bp);
::decode(errors, bp);
::decode(primary, bp);
if (has_shard_missing()) {
return;
}
@ -98,8 +100,7 @@ void shard_info_wrapper::decode(bufferlist::iterator& bp)
::decode(omap_digest, bp);
::decode(data_digest_present, bp);
::decode(data_digest, bp);
if (struct_v > 1)
::decode(selected_oi, bp);
::decode(selected_oi, bp);
DECODE_FINISH(bp);
}
@ -120,10 +121,12 @@ void
inconsistent_obj_wrapper::set_auth_missing(const hobject_t& hoid,
const map<pg_shard_t, ScrubMap*>& maps,
map<pg_shard_t, shard_info_wrapper> &shard_map,
int &shallow_errors, int &deep_errors)
int &shallow_errors, int &deep_errors,
const pg_shard_t &primary)
{
for (auto pg_map : maps) {
auto oid_object = pg_map.second->objects.find(hoid);
shard_map[pg_map.first].primary = (pg_map.first == primary);
if (oid_object == pg_map.second->objects.end())
shard_map[pg_map.first].set_missing();
else

View File

@ -78,6 +78,9 @@ public:
void set_ss_attr_corrupted() {
errors |= err_t::SS_ATTR_CORRUPTED;
}
void set_obj_size_oi_mismatch() {
errors |= err_t::OBJ_SIZE_OI_MISMATCH;
}
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bp);
};
@ -116,7 +119,8 @@ struct inconsistent_obj_wrapper : librados::inconsistent_obj_t {
void set_auth_missing(const hobject_t& hoid,
const map<pg_shard_t, ScrubMap*>&,
map<pg_shard_t, shard_info_wrapper>&,
int &shallow_errors, int &deep_errors);
int &shallow_errors, int &deep_errors,
const pg_shard_t &primary);
void set_version(uint64_t ver) { version = ver; }
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bp);

View File

@ -63,11 +63,12 @@ struct err_t {
OI_ATTR_MISSING = 1 << 14,
OI_ATTR_CORRUPTED = 1 << 15,
SS_ATTR_MISSING = 1 << 16,
SS_ATTR_CORRUPTED = 1 << 17
SS_ATTR_CORRUPTED = 1 << 17,
OBJ_SIZE_OI_MISMATCH = 1 << 18
// When adding more here add to either SHALLOW_ERRORS or DEEP_ERRORS
};
uint64_t errors = 0;
static constexpr uint64_t SHALLOW_ERRORS = SHARD_MISSING|SHARD_STAT_ERR|SIZE_MISMATCH_OI|OI_ATTR_MISSING|OI_ATTR_CORRUPTED|SS_ATTR_MISSING|SS_ATTR_CORRUPTED;
static constexpr uint64_t SHALLOW_ERRORS = SHARD_MISSING|SHARD_STAT_ERR|SIZE_MISMATCH_OI|OI_ATTR_MISSING|OI_ATTR_CORRUPTED|SS_ATTR_MISSING|SS_ATTR_CORRUPTED|OBJ_SIZE_OI_MISMATCH;
static constexpr uint64_t DEEP_ERRORS = SHARD_READ_ERR|DATA_DIGEST_MISMATCH_OI|OMAP_DIGEST_MISMATCH_OI|SHARD_EC_HASH_MISMATCH|SHARD_EC_SIZE_MISMATCH;
bool has_shard_missing() const {
return errors & SHARD_MISSING;
@ -111,6 +112,9 @@ struct err_t {
bool has_deep_errors() const {
return errors & DEEP_ERRORS;
}
bool has_obj_size_oi_mismatch() const {
return errors & OBJ_SIZE_OI_MISMATCH;
}
};
struct shard_info_t : err_t {
@ -121,6 +125,7 @@ struct shard_info_t : err_t {
bool data_digest_present = false;
uint32_t data_digest = 0;
bool selected_oi = false;
bool primary = false;
};
struct osd_shard_t {

View File

@ -5251,7 +5251,7 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
if (pool < 0 && isdigit(poolstr[0]))
pool = atoll(poolstr.c_str());
if (pool < 0) {
ss << "Invalid pool" << poolstr;
ss << "Invalid pool '" << poolstr << "''";
return;
}

View File

@ -711,6 +711,9 @@ bool PGBackend::be_compare_scrub_objects(
for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
i != auth.attrs.end();
++i) {
// We check system keys seperately
if (i->first == OI_ATTR || i->first == SS_ATTR)
continue;
if (!candidate.attrs.count(i->first)) {
if (error != CLEAN)
errorstream << ", ";
@ -728,6 +731,9 @@ bool PGBackend::be_compare_scrub_objects(
for (map<string,bufferptr>::const_iterator i = candidate.attrs.begin();
i != candidate.attrs.end();
++i) {
// We check system keys seperately
if (i->first == OI_ATTR || i->first == SS_ATTR)
continue;
if (!auth.attrs.count(i->first)) {
if (error != CLEAN)
errorstream << ", ";
@ -758,7 +764,7 @@ map<pg_shard_t, ScrubMap *>::const_iterator
inconsistent_obj_wrapper &object_error)
{
eversion_t auth_version;
bufferlist auth_bl;
bufferlist first_bl;
// Create list of shards with primary last so it will be auth copy all
// other things being equal.
@ -782,6 +788,8 @@ map<pg_shard_t, ScrubMap *>::const_iterator
}
string error_string;
auto& shard_info = shard_map[j->first];
if (j->first == get_parent()->whoami_shard())
shard_info.primary = true;
if (i->second.read_error) {
shard_info.set_read_error();
error_string += " read_error";
@ -809,6 +817,25 @@ map<pg_shard_t, ScrubMap *>::const_iterator
goto out;
}
// We won't pick an auth copy if the snapset is missing or won't decode.
if (obj.is_head() || obj.is_snapdir()) {
k = i->second.attrs.find(SS_ATTR);
if (k == i->second.attrs.end()) {
shard_info.set_ss_attr_missing();
error_string += " ss_attr_missing";
} else {
ss_bl.push_back(k->second);
try {
bufferlist::iterator bliter = ss_bl.begin();
::decode(ss, bliter);
} catch (...) {
// invalid snapset, probably corrupt
shard_info.set_ss_attr_corrupted();
error_string += " ss_attr_corrupted";
}
}
}
k = i->second.attrs.find(OI_ATTR);
if (k == i->second.attrs.end()) {
// no object info on object, probably corrupt
@ -827,48 +854,32 @@ map<pg_shard_t, ScrubMap *>::const_iterator
goto out;
}
if (oi.soid != obj) {
shard_info.set_oi_attr_corrupted();
error_string += " oi_attr_corrupted";
// This is automatically corrected in PG::_repair_oinfo_oid()
assert(oi.soid == obj);
if (first_bl.length() == 0) {
first_bl.append(bl);
} else if (!object_error.has_object_info_inconsistency() && !bl.contents_equal(first_bl)) {
object_error.set_object_info_inconsistency();
error_string += " object_info_inconsistency";
}
if (i->second.size != be_get_ondisk_size(oi.size)) {
dout(5) << __func__ << " size " << i->second.size << " oi size " << oi.size << dendl;
shard_info.set_obj_size_oi_mismatch();
error_string += " obj_size_oi_mismatch";
}
// Don't use this particular shard due to previous errors
// XXX: For now we can't pick one shard for repair and another's object info or snapset
if (shard_info.errors)
goto out;
}
if (auth_version != eversion_t()) {
if (!object_error.has_object_info_inconsistency() && !(bl == auth_bl)) {
object_error.set_object_info_inconsistency();
error_string += " object_info_inconsistency";
}
}
// Don't use this particular shard because it won't be able to repair data
// XXX: For now we can't pick one shard for repair and another's object info
if (i->second.read_error || i->second.ec_hash_mismatch || i->second.ec_size_mismatch)
goto out;
// We don't set errors here for snapset, but we won't pick an auth copy if the
// snapset is missing or won't decode.
if (obj.is_head() || obj.is_snapdir()) {
k = i->second.attrs.find(SS_ATTR);
if (k == i->second.attrs.end()) {
goto out;
}
ss_bl.push_back(k->second);
try {
bufferlist::iterator bliter = ss_bl.begin();
::decode(ss, bliter);
} catch (...) {
// invalid snapset, probably corrupt
goto out;
}
}
if (auth_version == eversion_t() || oi.version > auth_version ||
(oi.version == auth_version && dcount(oi) > dcount(*auth_oi))) {
auth = j;
*auth_oi = oi;
auth_version = oi.version;
auth_bl.clear();
auth_bl.append(bl);
}
out:
@ -929,7 +940,8 @@ void PGBackend::be_compare_scrubmaps(
set<pg_shard_t> object_errors;
if (auth == maps.end()) {
object_error.set_version(0);
object_error.set_auth_missing(*k, maps, shard_map, shallow_errors, deep_errors);
object_error.set_auth_missing(*k, maps, shard_map, shallow_errors,
deep_errors, get_parent()->whoami_shard());
if (object_error.has_deep_errors())
++deep_errors;
else if (object_error.has_shallow_errors())
@ -982,6 +994,7 @@ void PGBackend::be_compare_scrubmaps(
} else {
cur_missing.insert(j->first);
shard_map[j->first].set_missing();
shard_map[j->first].primary = (j->first == get_parent()->whoami_shard());
// Can't have any other errors if there is no information available
++shallow_errors;
errorstream << pgid << " shard " << j->first << " missing " << *k

View File

@ -1983,7 +1983,7 @@ int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter
}
int set_size(ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter,
ObjectStore::Sequencer &osr)
ObjectStore::Sequencer &osr, bool corrupt)
{
if (ghobj.hobj.is_snapdir()) {
cerr << "Can't set the size of a snapdir" << std::endl;
@ -2060,7 +2060,9 @@ int set_size(ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsiz
::encode(oi, attr, -1); /* fixme: using full features */
ObjectStore::Transaction t;
t.setattr(coll, ghobj, OI_ATTR, attr);
t.truncate(coll, ghobj, setsize);
// Only modify object info if we want to corrupt it
if (!corrupt)
t.truncate(coll, ghobj, setsize);
if (is_snap) {
bufferlist snapattr;
snapattr.clear();
@ -3396,7 +3398,9 @@ int main(int argc, char **argv)
}
ret = print_obj_info(fs, coll, ghobj, formatter);
goto out;
} else if (objcmd == "set-size") {
} else if (objcmd == "set-size" || objcmd == "corrupt-size") {
// Undocumented testing feature
bool corrupt = (objcmd == "corrupt-size");
// Extra arg
if (vm.count("arg1") == 0 || vm.count("arg2")) {
usage(desc);
@ -3409,7 +3413,7 @@ int main(int argc, char **argv)
goto out;
}
uint64_t size = atoll(arg1.c_str());
ret = set_size(fs, coll, ghobj, size, formatter, *osr);
ret = set_size(fs, coll, ghobj, size, formatter, *osr, corrupt);
goto out;
} else if (objcmd == "clear-snapset") {
// UNDOCUMENTED: For testing zap SnapSet

View File

@ -1338,6 +1338,12 @@ static void dump_errors(const err_t &err, Formatter &f, const char *name)
f.dump_string("error", "oi_attr_missing");
if (err.has_oi_attr_corrupted())
f.dump_string("error", "oi_attr_corrupted");
if (err.has_obj_size_oi_mismatch())
f.dump_string("error", "obj_size_oi_mismatch");
if (err.has_ss_attr_missing())
f.dump_string("error", "ss_attr_missing");
if (err.has_ss_attr_corrupted())
f.dump_string("error", "ss_attr_corrupted");
f.close_section();
}
@ -1369,7 +1375,11 @@ static void dump_shard(const shard_info_t& shard,
::decode(oi, bliter); // Can't be corrupted
f.dump_stream("object_info") << oi;
}
if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()) {
if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()
|| inc.union_shards.has_oi_attr_missing()
|| inc.union_shards.has_oi_attr_corrupted()
|| inc.union_shards.has_ss_attr_missing()
|| inc.union_shards.has_ss_attr_corrupted()) {
f.open_array_section("attrs");
for (auto kv : shard.attrs) {
f.open_object_section("attr");
@ -1448,6 +1458,7 @@ static void dump_inconsistent(const inconsistent_obj_t& inc,
f.open_object_section("shard");
auto& osd_shard = shard_info.first;
f.dump_int("osd", osd_shard.osd);
f.dump_bool("primary", shard_info.second.primary);
auto shard = osd_shard.shard;
if (shard != shard_id_t::NO_SHARD)
f.dump_unsigned("shard", shard);