* lots of work on rejoin. still some details left.

git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1479 29311d96-e01e-0410-9327-a35deaab8ce9
This commit is contained in:
sageweil 2007-07-05 22:50:14 +00:00
parent c23e3c15ae
commit 628e6548a0
9 changed files with 625 additions and 358 deletions

View File

@ -23,6 +23,9 @@ some smallish projects:
- generalize monitor client?
- throttle message resend attempts
- ENOSPC on client, OSD
code cleanup
- endian portability
@ -49,12 +52,21 @@ sage doc
sage mds
- fix rejoin
- validate dirfrag/dentry/inode connectivity
/ - validate dentry<->inode connectivity
- clean up remove_gather() crap
- add_strong_* should take the cache object
- all replicated scatterlocks should start out in scatter state.
- carefully document rejoin
- cases
- confounding factors
- fix rename.. don't journal on witnesses unless we have to.
- fix unlink.. journal on witnesses if the file is open.
- unlink needs to journal on witnesses (probably), since unlinked inodes may be in those journals
-> hmm, no, rejoin needs to be more robust, and validate namespace changes.
- stray reintegration
- stray purge on shutdown

View File

@ -1547,6 +1547,7 @@ int SyntheticClient::thrash_links(const char *basedir, int dirs, int files, int
break;
case 1:
client->mknod(src.c_str(), 0755);
client->unlink(dst.c_str());
client->link(src.c_str(), dst.c_str());
break;
case 2: client->unlink(src.c_str()); break;

View File

@ -163,6 +163,7 @@ class CInode : public MDSCacheObject {
map<frag_t,CDir*> dirfrags; // cached dir fragments
frag_t pick_dirfrag(const string &dn);
bool has_dirfrags() { return !dirfrags.empty(); }
CDir* get_dirfrag(frag_t fg) {
if (dirfrags.count(fg))
return dirfrags[fg];

View File

@ -410,6 +410,22 @@ void Locker::xlock_finish(SimpleLock *lock, MDRequest *mdr)
/** rejoin_set_state
* @lock the lock
* @s the new state
* @waiters list for anybody waiting on this lock
*/
void Locker::rejoin_set_state(SimpleLock *lock, int s, list<Context*>& waiters)
{
if (!lock->is_stable()) {
lock->set_state(s);
lock->get_parent()->auth_unpin();
} else {
lock->set_state(s);
}
lock->take_waiting(SimpleLock::WAIT_ALL, waiters);
}

View File

@ -85,6 +85,9 @@ protected:
bool wrlock_start(SimpleLock *lock, MDRequest *mdr);
void wrlock_finish(SimpleLock *lock, MDRequest *mdr);
public:
void rejoin_set_state(SimpleLock *lock, int s, list<Context*>& waiters);
// simple
public:
void try_simple_eval(SimpleLock *lock);

File diff suppressed because it is too large Load Diff

View File

@ -346,8 +346,11 @@ protected:
void cache_rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin);
void handle_cache_rejoin(MMDSCacheRejoin *m);
void handle_cache_rejoin_rejoin(MMDSCacheRejoin *m);
void handle_cache_rejoin_weak_rejoin(MMDSCacheRejoin *m);
void handle_cache_rejoin_strong_rejoin(MMDSCacheRejoin *m);
void rejoin_scour_survivor_replicas(int from, MMDSCacheRejoin *ack);
void handle_cache_rejoin_ack(MMDSCacheRejoin *m);
void handle_cache_rejoin_purge(MMDSCacheRejoin *m);
void handle_cache_rejoin_missing(MMDSCacheRejoin *m);
void handle_cache_rejoin_full(MMDSCacheRejoin *m);
void send_cache_rejoin_acks();

View File

@ -72,6 +72,7 @@ public:
static const int WAIT_STABLE = (1<<2); // for a stable state
static const int WAIT_REMOTEXLOCK = (1<<3); // for a remote xlock
static const int WAIT_BITS = 4;
static const int WAIT_ALL = ((1<<WAIT_BITS)-1);
protected:
// parent (what i lock)
@ -123,6 +124,9 @@ public:
void finish_waiters(int mask, int r=0) {
parent->finish_waiting(mask << wait_offset, r);
}
void take_waiting(int mask, list<Context*>& ls) {
parent->take_waiting(mask << wait_offset, ls);
}
void add_waiter(int mask, Context *c) {
parent->add_waiter(mask << wait_offset, c);
}

View File

@ -23,13 +23,16 @@
class MMDSCacheRejoin : public Message {
public:
static const int OP_REJOIN = 1; // replica -> auth, i exist. and maybe my lock state.
static const int OP_WEAK = 1; // replica -> auth, i exist, + maybe open files.
static const int OP_STRONG = 2; // replica -> auth, i exist, + open files and lock state.
static const int OP_ACK = 3; // auth -> replica, here is your lock state.
static const int OP_MISSING = 4; // auth -> replica, i am missing these items
static const int OP_FULL = 5; // replica -> auth, here is the full object.
static const int OP_PURGE = 4; // auth -> replica, remove these items, they are old/obsolete.
static const int OP_MISSING = 5; // auth -> replica, i am missing these items
static const int OP_FULL = 6; // replica -> auth, here is the full object.
static const char *get_opname(int op) {
switch (op) {
case OP_REJOIN: return "rejoin";
case OP_WEAK: return "weak";
case OP_STRONG: return "strong";
case OP_ACK: return "ack";
case OP_MISSING: return "missing";
case OP_FULL: return "full";
@ -77,27 +80,39 @@ class MMDSCacheRejoin : public Message {
dirfrag_strong(int n) : nonce(n) {}
};
struct dn_strong {
inodeno_t ino;
inodeno_t remote_ino;
int32_t nonce;
int32_t lock;
dn_strong() {}
dn_strong(int n, int l) : nonce(n), lock(l) {}
dn_strong() : ino(0), remote_ino(0), nonce(0), lock(0) {}
dn_strong(inodeno_t pi, inodeno_t ri, int n, int l) :
ino(pi), remote_ino(ri), nonce(n), lock(l) {}
bool is_primary() { return ino > 0; }
bool is_remote() { return remote_ino > 0; }
bool is_null() { return ino == 0 && remote_ino == 0; }
};
struct dn_weak {
inodeno_t ino;
inodeno_t remote_ino;
dn_weak() : ino(0), remote_ino(0) {}
dn_weak(inodeno_t pi, inodeno_t ri) : ino(pi), remote_ino(ri) {}
bool is_primary() { return ino > 0; }
bool is_remote() { return remote_ino > 0; }
bool is_null() { return ino == 0 && remote_ino == 0; }
};
// -- data --
int32_t op;
// weak
map<dirfrag_t, map<string, dn_weak> > weak_dentries;
map<dirfrag_t, map<string, dn_weak> > weak;
set<inodeno_t> weak_inodes;
// strong
map<inodeno_t, inode_strong> strong_inodes;
map<dirfrag_t, dirfrag_strong> strong_dirfrags;
map<dirfrag_t, map<string, dn_strong> > strong_dentries;
map<inodeno_t, inode_strong> strong_inodes;
// full
list<inode_full> full_inodes;
@ -120,6 +135,9 @@ class MMDSCacheRejoin : public Message {
// -- builders --
// inodes
void add_weak_inode(inodeno_t i) {
weak_inodes.insert(i);
}
void add_strong_inode(inodeno_t i, int n, int cw, int a, int l, int dft, int f, int dl) {
strong_inodes[i] = inode_strong(n, cw, a, l, dft, f, dl);
}
@ -135,24 +153,30 @@ class MMDSCacheRejoin : public Message {
// dirfrags
void add_weak_dirfrag(dirfrag_t df) {
weak_dentires[df];
weak[df];
}
void add_weak_dirfrag(dirfrag_t df, map<string,dn_weak>& dnmap) {
weak[df] = dnmap;
}
void add_strong_dirfrag(dirfrag_t df, int n) {
strong_dirfrags[df] = dirfrag_strong(n);
}
// dentries
void add_weak_dentry(dirfrag_t df, const string& dname, dn_weak& dnw) {
weak[df][dname] = dnw;
}
void add_weak_null_dentry(dirfrag_t df, const string& dname) {
weak_dentries[df][dname] = dn_weak(0, 0);
weak[df][dname] = dn_weak(0, 0);
}
void add_weak_primary_dentry(dirfrag_t df, const string& dname, inodeno_t ino) {
weak_dentries[df][dname] = dn_weak(ino, 0);
weak[df][dname] = dn_weak(ino, 0);
}
void add_weak_remote_dentry(dirfrag_t df, const string& dname, inodeno_t ino) {
weak_dentries[df][dname] = dn_weak(0, ino);
weak[df][dname] = dn_weak(0, ino);
}
void add_strong_dentry(dirfrag_t df, const string& dname, int n, int ls) {
strong_dentries[df][dname] = dn_strong(n, ls);
void add_strong_dentry(dirfrag_t df, const string& dname, inodeno_t pi, inodeno_t ri, int n, int ls) {
strong_dentries[df][dname] = dn_strong(pi, ri, n, ls);
}
void add_dentry_authpin(dirfrag_t df, const string& dname, const metareqid_t& ri) {
authpinned_dentries[df][dname] = ri;
@ -174,7 +198,8 @@ class MMDSCacheRejoin : public Message {
::_encode(authpinned_inodes, payload);
::_encode(xlocked_inodes, payload);
::_encode(strong_dirfrags, payload);
::_encode(weak_dentries, payload);
::_encode(weak, payload);
::_encode(weak_inodes, payload);
::_encode(strong_dentries, payload);
::_encode(authpinned_dentries, payload);
::_encode(xlocked_dentries, payload);
@ -192,7 +217,8 @@ class MMDSCacheRejoin : public Message {
::_decode(authpinned_inodes, payload, off);
::_decode(xlocked_inodes, payload, off);
::_decode(strong_dirfrags, payload, off);
::_decode(weak_dentries, payload, off);
::_decode(weak, payload, off);
::_decode(weak_inodes, payload, off);
::_decode(strong_dentries, payload, off);
::_decode(authpinned_dentries, payload, off);
::_decode(xlocked_dentries, payload, off);