* fixed subtree_map metablob bug (some bounds were left out)

* fixed importstart/finish spanning subtree_map replay bug


git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1490 29311d96-e01e-0410-9327-a35deaab8ce9
This commit is contained in:
sageweil 2007-07-12 22:48:56 +00:00
parent eb2abe084f
commit d820c28526
6 changed files with 102 additions and 57 deletions

View File

@ -59,15 +59,13 @@ sage mds
/ - clean up remove_gather() crap
- add_strong_* should take the cache object?
/ - all replicated scatterlocks should start out in scatter state.
- parallel_fetch
/ - parallel_fetch
- missing/full
- carefully document rejoin
- cases
- confounding factors
- client sessions vs cap migration.
- sessions need incarnation to avoid close races.
- mds->client session open on cap import, if needed.

View File

@ -512,7 +512,7 @@ int Client::choose_target_mds(MClientRequest *req)
// pick mds
if (!diri || g_conf.client_use_random_mds) {
// no root info, pick a random MDS
mds = rand() % mdsmap->get_num_mds();
mds = 0;//rand() % mdsmap->get_num_mds();
} else {
if (req->auth_is_best()) {
// pick the actual auth (as best we can)

View File

@ -907,8 +907,9 @@ void MDCache::log_subtree_map(Context *onsync)
CDir *dir = p->first;
if (!dir->is_auth()) continue;
dout(15) << " subtree " << *dir << endl;
le->subtrees[dir->dirfrag()].clear();
le->metablob.add_dir_context(dir, true);
le->metablob.add_dir_context(dir, EMetaBlob::TO_ROOT);
le->metablob.add_dir(dir, false);
// bounds
@ -916,12 +917,15 @@ void MDCache::log_subtree_map(Context *onsync)
q != p->second.end();
++q) {
CDir *bound = *q;
dout(15) << " subtree bound " << *bound << endl;
le->subtrees[dir->dirfrag()].push_back(bound->dirfrag());
le->metablob.add_dir_context(bound);
le->metablob.add_dir_context(bound, EMetaBlob::TO_ROOT);
le->metablob.add_dir(bound, false);
}
}
//le->metablob.print(cout);
Context *fin = new C_MDS_WroteSubtreeMap(this, mds->mdlog->get_write_pos());
mds->mdlog->writing_subtree_map = true;
mds->mdlog->submit_entry(le);

View File

@ -331,6 +331,9 @@ public:
// ambiguous imports
void add_ambiguous_import(dirfrag_t base, list<dirfrag_t>& bounds);
void add_ambiguous_import(CDir *base, const set<CDir*>& bounds);
bool have_ambiguous_import(dirfrag_t base) {
return my_ambiguous_imports.count(base);
}
void cancel_ambiguous_import(dirfrag_t dirino);
void finish_ambiguous_import(dirfrag_t dirino);
void send_resolve(int who);

View File

@ -73,6 +73,11 @@ class EMetaBlob {
bl.copy(off, sizeof(dirty), (char*)&dirty);
off += sizeof(dirty);
}
void print(ostream& out) {
out << " fullbit dn " << dn << " dnv " << dnv
<< " inode " << inode.ino
<< " dirty=" << dirty << endl;
}
};
/* remotebit - a dentry + remote inode link (i.e. just an ino)
@ -100,6 +105,11 @@ class EMetaBlob {
bl.copy(off, sizeof(dirty), (char*)&dirty);
off += sizeof(dirty);
}
void print(ostream& out) {
out << " remotebit dn " << dn << " dnv " << dnv
<< " ino " << ino
<< " dirty=" << dirty << endl;
}
};
/*
@ -123,6 +133,10 @@ class EMetaBlob {
bl.copy(off, sizeof(dirty), (char*)&dirty);
off += sizeof(dirty);
}
void print(ostream& out) {
out << " nullbit dn " << dn << " dnv " << dnv
<< " dirty=" << dirty << endl;
}
};
@ -132,13 +146,12 @@ class EMetaBlob {
static const int STATE_COMPLETE = (1<<1);
static const int STATE_DIRTY = (1<<2); // dirty due to THIS journal item, that is!
dirfrag_t dirfrag;
version_t dirv;
int state;
int nfull, nremote, nnull;
bufferlist bfull, bremote, bnull;
private:
bufferlist dnbl;
bool dn_decoded;
list<fullbit> dfull;
list<remotebit> dremote;
@ -156,52 +169,57 @@ class EMetaBlob {
list<remotebit> &get_dremote() { return dremote; }
list<nullbit> &get_dnull() { return dnull; }
void print(dirfrag_t dirfrag, ostream& out) {
out << "dirlump " << dirfrag << " dirv " << dirv
<< " state " << state
<< " num " << nfull << "/" << nremote << "/" << nnull
<< endl;
_decode_bits();
for (list<fullbit>::iterator p = dfull.begin(); p != dfull.end(); ++p)
p->print(out);
for (list<remotebit>::iterator p = dremote.begin(); p != dremote.end(); ++p)
p->print(out);
for (list<nullbit>::iterator p = dnull.begin(); p != dnull.end(); ++p)
p->print(out);
}
void _encode_bits() {
for (list<fullbit>::iterator p = dfull.begin(); p != dfull.end(); ++p)
p->_encode(bfull);
p->_encode(dnbl);
for (list<remotebit>::iterator p = dremote.begin(); p != dremote.end(); ++p)
p->_encode(bremote);
p->_encode(dnbl);
for (list<nullbit>::iterator p = dnull.begin(); p != dnull.end(); ++p)
p->_encode(bnull);
p->_encode(dnbl);
}
void _decode_bits() {
if (dn_decoded) return;
int off = 0;
for (int i=0; i<nfull; i++)
dfull.push_back(fullbit(bfull, off));
off = 0;
dfull.push_back(fullbit(dnbl, off));
for (int i=0; i<nremote; i++)
dremote.push_back(remotebit(bremote, off));
off = 0;
dremote.push_back(remotebit(dnbl, off));
for (int i=0; i<nnull; i++)
dnull.push_back(nullbit(bnull, off));
dnull.push_back(nullbit(dnbl, off));
dn_decoded = true;
}
void _encode(bufferlist& bl) {
bl.append((char*)&dirfrag, sizeof(dirfrag));
bl.append((char*)&dirv, sizeof(dirv));
bl.append((char*)&state, sizeof(state));
bl.append((char*)&nfull, sizeof(nfull));
bl.append((char*)&nremote, sizeof(nremote));
bl.append((char*)&nnull, sizeof(nnull));
::_encode(dirv, bl);
::_encode(state, bl);
::_encode(nfull, bl);
::_encode(nremote, bl);
::_encode(nnull, bl);
_encode_bits();
::_encode(bfull, bl);
::_encode(bremote, bl);
::_encode(bnull, bl);
::_encode(dnbl, bl);
}
void _decode(bufferlist& bl, int& off) {
bl.copy(off, sizeof(dirfrag), (char*)&dirfrag); off += sizeof(dirfrag);
bl.copy(off, sizeof(dirv), (char*)&dirv); off += sizeof(dirv);
bl.copy(off, sizeof(state), (char*)&state); off += sizeof(state);
bl.copy(off, sizeof(nfull), (char*)&nfull); off += sizeof(nfull);
bl.copy(off, sizeof(nremote), (char*)&nremote); off += sizeof(nremote);
bl.copy(off, sizeof(nnull), (char*)&nnull); off += sizeof(nnull);
::_decode(bfull, bl, off);
::_decode(bremote, bl, off);
::_decode(bnull, bl, off);
// don't decode bits unless we need them.
dn_decoded = false;
::_decode(dirv, bl, off);
::_decode(state, bl, off);
::_decode(nfull, bl, off);
::_decode(nremote, bl, off);
::_decode(nnull, bl, off);
::_decode(dnbl, bl, off);
dn_decoded = false; // don't decode bits unless we need them.
}
};
@ -226,6 +244,13 @@ class EMetaBlob {
list<metareqid_t> client_reqs;
public:
void print(ostream& out) {
for (list<dirfrag_t>::iterator p = lump_order.begin();
p != lump_order.end();
++p) {
lump_map[*p].print(*p, out);
}
}
void add_client_req(metareqid_t r) {
client_reqs.push_back(r);
@ -332,20 +357,28 @@ class EMetaBlob {
if (dirty) l.mark_dirty();
return l;
}
void add_dir_context(CDir *dir, bool toroot=false) {
static const int TO_AUTH_SUBTREE_ROOT = 0; // default.
static const int TO_ROOT = 1;
void add_dir_context(CDir *dir, int mode = TO_AUTH_SUBTREE_ROOT) {
// already have this dir? (we must always add in order)
if (lump_map.count(dir->dirfrag()))
return;
// stop at subtree root?
if (mode == TO_AUTH_SUBTREE_ROOT &&
dir->is_subtree_root() && dir->is_auth())
return;
// stop at root/stray
CInode *diri = dir->get_inode();
if (!toroot && dir->is_subtree_root() && dir->is_auth())
return; // stop at subtree root
if (!dir->get_inode()->get_parent_dn())
if (!diri->get_parent_dn())
return;
// add parent dn
CDentry *parent = diri->get_parent_dn();
add_dir_context(parent->get_dir(), toroot);
add_dir_context(parent->get_dir(), mode);
add_dentry(parent, false);
}
@ -353,12 +386,13 @@ class EMetaBlob {
// encoding
void _encode(bufferlist& bl) {
int n = lump_map.size();
bl.append((char*)&n, sizeof(n));
int32_t n = lump_map.size();
::_encode(n, bl);
for (list<dirfrag_t>::iterator i = lump_order.begin();
i != lump_order.end();
++i) {
bl.append((char*)&(*i), sizeof(*i));
dirfrag_t dirfrag = *i;
::_encode(dirfrag, bl);
lump_map[*i]._encode(bl);
}
::_encode(atids, bl);
@ -370,13 +404,11 @@ class EMetaBlob {
::_encode(client_reqs, bl);
}
void _decode(bufferlist& bl, int& off) {
int n;
bl.copy(off, sizeof(n), (char*)&n);
off += sizeof(n);
int32_t n;
::_decode(n, bl, off);
for (int i=0; i<n; i++) {
dirfrag_t dirfrag;
bl.copy(off, sizeof(dirfrag), (char*)&dirfrag);
off += sizeof(dirfrag);
dirfrag_t dirfrag;
::_decode(dirfrag, bl, off);
lump_order.push_back(dirfrag);
lump_map[dirfrag]._decode(bl, off);
}

View File

@ -854,6 +854,7 @@ void ESubtreeMap::replay(MDS *mds)
dout(10) << "ESubtreeMap.replay -- reconstructing (auth) subtree spanning tree" << endl;
// first, stick the spanning tree in my cache
//metablob.print(cout);
metablob.replay(mds);
// restore import/export maps
@ -979,11 +980,18 @@ void EImportFinish::expire(MDS *mds, Context *c)
void EImportFinish::replay(MDS *mds)
{
dout(10) << "EImportFinish.replay " << base << " success=" << success << endl;
if (success)
mds->mdcache->finish_ambiguous_import(base);
else
mds->mdcache->cancel_ambiguous_import(base);
if (mds->mdcache->have_ambiguous_import(base)) {
dout(10) << "EImportFinish.replay " << base << " success=" << success << endl;
if (success)
mds->mdcache->finish_ambiguous_import(base);
else
mds->mdcache->cancel_ambiguous_import(base);
} else {
dout(10) << "EImportFinish.replay " << base << " success=" << success
<< ", predates my subtree_map start point, ignoring"
<< endl;
// verify that?
}
}