Merge pull request #10863 from xiexingguo/xxg-wip-fix-bfs-08-25

os/bluestore/bluefs: add file refs check

Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2016-08-29 08:59:44 -05:00 committed by GitHub
commit 96c4bfed3c
2 changed files with 11 additions and 313 deletions

View File

@ -670,6 +670,7 @@ int BlueFS::_replay(bool noop)
assert(q != dir_map.end());
map<string,FileRef>::iterator r = q->second->file_map.find(filename);
assert(r != q->second->file_map.end());
assert(r->second->refs > 0);
--r->second->refs;
q->second->file_map.erase(r);
}
@ -788,6 +789,7 @@ void BlueFS::_drop_link(FileRef file)
{
dout(20) << __func__ << " had refs " << file->refs
<< " on " << file->fnode << dendl;
assert(file->refs > 0);
--file->refs;
if (file->refs == 0) {
dout(20) << __func__ << " destroying " << file->fnode << dendl;
@ -2007,10 +2009,10 @@ int BlueFS::lock_file(const string& dirname, const string& filename,
log_t.op_dir_link(dirname, filename, file->fnode.ino);
} else {
file = q->second.get();
}
if (file->locked) {
dout(10) << __func__ << " already locked" << dendl;
return -EBUSY;
if (file->locked) {
dout(10) << __func__ << " already locked" << dendl;
return -EBUSY;
}
}
file->locked = true;
*plock = new FileLock(file);
@ -2074,6 +2076,11 @@ int BlueFS::unlink(const string& dirname, const string& filename)
return -ENOENT;
}
FileRef file = q->second;
if (file->locked) {
dout(20) << __func__ << " file " << dirname << "/" << filename
<< " is locked" << dendl;
return -EBUSY;
}
dir->file_map.erase(filename);
log_t.op_dir_unlink(dirname, filename);
_drop_link(file);

View File

@ -1047,315 +1047,6 @@ int KStore::fsck()
{
dout(1) << __func__ << dendl;
int errors = 0;
#if 0
set<uint64_t> used_nids;
set<uint64_t> used_omap_head;
interval_set<uint64_t> used_blocks;
KeyValueDB::Iterator it;
int r = _open_path();
if (r < 0)
return r;
r = _open_fsid(false);
if (r < 0)
goto out_path;
r = _read_fsid(&fsid);
if (r < 0)
goto out_fsid;
r = _lock_fsid();
if (r < 0)
goto out_fsid;
r = _open_bdev(false);
if (r < 0)
goto out_fsid;
r = _open_db(false);
if (r < 0)
goto out_bdev;
r = _open_alloc();
if (r < 0)
goto out_db;
r = _open_super_meta();
if (r < 0)
goto out_alloc;
r = _open_collections(&errors);
if (r < 0)
goto out_alloc;
if (bluefs) {
used_blocks.insert(0, BLUEFS_START);
used_blocks.insert(bluefs_extents);
r = bluefs->fsck();
if (r < 0)
goto out_alloc;
if (r > 0)
errors += r;
}
// walk collections, objects
for (ceph::unordered_map<coll_t, CollectionRef>::iterator p = coll_map.begin();
p != coll_map.end() && !errors;
++p) {
dout(1) << __func__ << " collection " << p->first << dendl;
CollectionRef c = _get_collection(p->first);
RWLock::RLocker l(c->lock);
ghobject_t pos;
while (!errors) {
vector<ghobject_t> ols;
int r = collection_list(p->first, pos, ghobject_t::get_max(), true,
100, &ols, &pos);
if (r < 0) {
++errors;
break;
}
if (ols.empty()) {
break;
}
for (auto& oid : ols) {
dout(10) << __func__ << " " << oid << dendl;
OnodeRef o = c->get_onode(oid, false);
if (!o || !o->exists) {
++errors;
break;
}
if (o->onode.nid) {
if (used_nids.count(o->onode.nid)) {
derr << " " << oid << " nid " << o->onode.nid << " already in use"
<< dendl;
++errors;
break;
}
used_nids.insert(o->onode.nid);
}
// blocks
for (auto& b : o->onode.block_map) {
if (used_blocks.contains(b.second.offset, b.second.length)) {
derr << " " << oid << " extent " << b.first << ": " << b.second
<< " already allocated" << dendl;
++errors;
continue;
}
used_blocks.insert(b.second.offset, b.second.length);
if (b.second.end() > bdev->get_size()) {
derr << " " << oid << " extent " << b.first << ": " << b.second
<< " past end of block device" << dendl;
++errors;
}
}
// overlays
set<string> overlay_keys;
map<uint64_t,int> refs;
for (auto& v : o->onode.overlay_map) {
if (v.first + v.second.length > o->onode.size) {
derr << " " << oid << " overlay " << v.first << " " << v.second
<< " extends past end of object" << dendl;
++errors;
}
if (v.second.key > o->onode.last_overlay_key) {
derr << " " << oid << " overlay " << v.first << " " << v.second
<< " is > last_overlay_key " << o->onode.last_overlay_key
<< dendl;
++errors;
}
++refs[v.second.key];
string key;
bufferlist val;
get_overlay_key(o->onode.nid, v.second.key, &key);
overlay_keys.insert(key);
int r = db->get(PREFIX_OVERLAY, key, &val);
if (r < 0) {
derr << " " << oid << " overlay " << v.first << " " << v.second
<< " failed to fetch: " << cpp_strerror(r) << dendl;
++errors;
}
if (val.length() < v.second.value_offset + v.second.length) {
derr << " " << oid << " overlay " << v.first << " " << v.second
<< " too short, " << val.length() << dendl;
++errors;
}
}
for (auto& vr : o->onode.overlay_refs) {
if (refs[vr.first] != vr.second) {
derr << " " << oid << " overlay key " << vr.first
<< " says " << vr.second << " refs but we have "
<< refs[vr.first] << dendl;
++errors;
}
refs.erase(vr.first);
}
for (auto& p : refs) {
if (p.second > 1) {
derr << " " << oid << " overlay key " << p.first
<< " has " << p.second << " refs but they are not recorded"
<< dendl;
++errors;
}
}
do {
string start;
get_overlay_key(o->onode.nid, 0, &start);
KeyValueDB::Iterator it = db->get_iterator(PREFIX_OVERLAY);
if (!it)
break;
for (it->lower_bound(start); it->valid(); it->next()) {
string k = it->key();
const char *p = k.c_str();
uint64_t nid;
p = _key_decode_u64(p, &nid);
if (nid != o->onode.nid)
break;
if (!overlay_keys.count(k)) {
derr << " " << oid << " has stray overlay kv pair for "
<< k << dendl;
++errors;
}
}
} while (false);
// omap
while (o->onode.omap_head) {
if (used_omap_head.count(o->onode.omap_head)) {
derr << " " << oid << " omap_head " << o->onode.omap_head
<< " already in use" << dendl;
++errors;
break;
}
used_omap_head.insert(o->onode.omap_head);
// hrm, scan actual key/value pairs?
KeyValueDB::Iterator it = db->get_iterator(PREFIX_OMAP);
if (!it)
break;
string head, tail;
get_omap_header(o->onode.omap_head, &head);
get_omap_tail(o->onode.omap_head, &tail);
it->lower_bound(head);
while (it->valid()) {
if (it->key() == head) {
dout(30) << __func__ << " got header" << dendl;
} else if (it->key() >= tail) {
dout(30) << __func__ << " reached tail" << dendl;
break;
} else {
string user_key;
decode_omap_key(it->key(), &user_key);
dout(30) << __func__
<< " got " << pretty_binary_string(it->key())
<< " -> " << user_key << dendl;
assert(it->key() < tail);
}
it->next();
}
break;
}
}
}
}
dout(1) << __func__ << " checking for stray objects" << dendl;
it = db->get_iterator(PREFIX_OBJ);
if (it) {
CollectionRef c;
for (it->lower_bound(string()); it->valid(); it->next()) {
ghobject_t oid;
int r = get_key_object(it->key(), &oid);
if (r < 0) {
dout(30) << __func__ << " bad object key "
<< pretty_binary_string(it->key()) << dendl;
++errors;
continue;
}
if (!c || !c->contains(oid)) {
c = NULL;
for (ceph::unordered_map<coll_t, CollectionRef>::iterator p =
coll_map.begin();
p != coll_map.end() && !errors;
++p) {
if (p->second->contains(oid)) {
c = p->second;
break;
}
}
if (!c) {
dout(30) << __func__ << " stray object " << oid
<< " not owned by any collection" << dendl;
++errors;
continue;
}
}
}
}
dout(1) << __func__ << " checking for stray overlay data" << dendl;
it = db->get_iterator(PREFIX_OVERLAY);
if (it) {
for (it->lower_bound(string()); it->valid(); it->next()) {
string key = it->key();
const char *p = key.c_str();
uint64_t nid;
p = _key_decode_u64(p, &nid);
if (used_nids.count(nid) == 0) {
derr << __func__ << " found stray overlay data on nid " << nid << dendl;
++errors;
}
}
}
dout(1) << __func__ << " checking for stray omap data" << dendl;
it = db->get_iterator(PREFIX_OMAP);
if (it) {
for (it->lower_bound(string()); it->valid(); it->next()) {
string key = it->key();
const char *p = key.c_str();
uint64_t omap_head;
p = _key_decode_u64(p, &omap_head);
if (used_omap_head.count(omap_head) == 0) {
derr << __func__ << " found stray omap data on omap_head " << omap_head
<< dendl;
++errors;
}
}
}
dout(1) << __func__ << " checking freelist vs allocated" << dendl;
{
const map<uint64_t,uint64_t>& free = fm->get_freelist();
for (map<uint64_t,uint64_t>::const_iterator p = free.begin();
p != free.end(); ++p) {
if (used_blocks.contains(p->first, p->second)) {
derr << __func__ << " free extent " << p->first << "~" << p->second
<< " intersects allocated blocks" << dendl;
++errors;
continue;
}
used_blocks.insert(p->first, p->second);
}
if (!used_blocks.contains(0, bdev->get_size())) {
derr << __func__ << " leaked some space; free+used = "
<< used_blocks
<< " != expected 0~" << bdev->get_size()
<< dendl;
++errors;
}
}
coll_map.clear();
out_alloc:
_close_alloc();
out_db:
it.reset(); // before db is closed
_close_db();
out_bdev:
_close_bdev();
out_fsid:
_close_fsid();
out_path:
_close_path();
#endif
dout(1) << __func__ << " finish with " << errors << " errors" << dendl;
return errors;
}