mirror of
https://github.com/ceph/ceph
synced 2025-03-11 02:39:05 +00:00
* lots of work on rejoin. still some details left.
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1479 29311d96-e01e-0410-9327-a35deaab8ce9
This commit is contained in:
parent
c23e3c15ae
commit
628e6548a0
@ -23,6 +23,9 @@ some smallish projects:
|
||||
- generalize monitor client?
|
||||
- throttle message resend attempts
|
||||
|
||||
- ENOSPC on client, OSD
|
||||
|
||||
|
||||
|
||||
code cleanup
|
||||
- endian portability
|
||||
@ -49,12 +52,21 @@ sage doc
|
||||
sage mds
|
||||
|
||||
- fix rejoin
|
||||
- validate dirfrag/dentry/inode connectivity
|
||||
/ - validate dentry<->inode connectivity
|
||||
- clean up remove_gather() crap
|
||||
- add_strong_* should take the cache object
|
||||
- all replicated scatterlocks should start out in scatter state.
|
||||
- carefully document rejoin
|
||||
- cases
|
||||
- confounding factors
|
||||
|
||||
|
||||
- fix rename.. don't journal on witnesses unless we have to.
|
||||
- fix unlink.. journal on witnesses if the file is open.
|
||||
|
||||
- unlink needs to journal on witnesses (probably), since unlinked inodes may be in those journals
|
||||
-> hmm, no, rejoin needs to be more robust, and validate namespace changes.
|
||||
|
||||
|
||||
- stray reintegration
|
||||
- stray purge on shutdown
|
||||
|
@ -1547,6 +1547,7 @@ int SyntheticClient::thrash_links(const char *basedir, int dirs, int files, int
|
||||
break;
|
||||
case 1:
|
||||
client->mknod(src.c_str(), 0755);
|
||||
client->unlink(dst.c_str());
|
||||
client->link(src.c_str(), dst.c_str());
|
||||
break;
|
||||
case 2: client->unlink(src.c_str()); break;
|
||||
|
@ -163,6 +163,7 @@ class CInode : public MDSCacheObject {
|
||||
map<frag_t,CDir*> dirfrags; // cached dir fragments
|
||||
|
||||
frag_t pick_dirfrag(const string &dn);
|
||||
bool has_dirfrags() { return !dirfrags.empty(); }
|
||||
CDir* get_dirfrag(frag_t fg) {
|
||||
if (dirfrags.count(fg))
|
||||
return dirfrags[fg];
|
||||
|
@ -410,6 +410,22 @@ void Locker::xlock_finish(SimpleLock *lock, MDRequest *mdr)
|
||||
|
||||
|
||||
|
||||
/** rejoin_set_state
|
||||
* @lock the lock
|
||||
* @s the new state
|
||||
* @waiters list for anybody waiting on this lock
|
||||
*/
|
||||
void Locker::rejoin_set_state(SimpleLock *lock, int s, list<Context*>& waiters)
|
||||
{
|
||||
if (!lock->is_stable()) {
|
||||
lock->set_state(s);
|
||||
lock->get_parent()->auth_unpin();
|
||||
} else {
|
||||
lock->set_state(s);
|
||||
}
|
||||
lock->take_waiting(SimpleLock::WAIT_ALL, waiters);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -85,6 +85,9 @@ protected:
|
||||
bool wrlock_start(SimpleLock *lock, MDRequest *mdr);
|
||||
void wrlock_finish(SimpleLock *lock, MDRequest *mdr);
|
||||
|
||||
public:
|
||||
void rejoin_set_state(SimpleLock *lock, int s, list<Context*>& waiters);
|
||||
|
||||
// simple
|
||||
public:
|
||||
void try_simple_eval(SimpleLock *lock);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -346,8 +346,11 @@ protected:
|
||||
|
||||
void cache_rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin);
|
||||
void handle_cache_rejoin(MMDSCacheRejoin *m);
|
||||
void handle_cache_rejoin_rejoin(MMDSCacheRejoin *m);
|
||||
void handle_cache_rejoin_weak_rejoin(MMDSCacheRejoin *m);
|
||||
void handle_cache_rejoin_strong_rejoin(MMDSCacheRejoin *m);
|
||||
void rejoin_scour_survivor_replicas(int from, MMDSCacheRejoin *ack);
|
||||
void handle_cache_rejoin_ack(MMDSCacheRejoin *m);
|
||||
void handle_cache_rejoin_purge(MMDSCacheRejoin *m);
|
||||
void handle_cache_rejoin_missing(MMDSCacheRejoin *m);
|
||||
void handle_cache_rejoin_full(MMDSCacheRejoin *m);
|
||||
void send_cache_rejoin_acks();
|
||||
|
@ -72,6 +72,7 @@ public:
|
||||
static const int WAIT_STABLE = (1<<2); // for a stable state
|
||||
static const int WAIT_REMOTEXLOCK = (1<<3); // for a remote xlock
|
||||
static const int WAIT_BITS = 4;
|
||||
static const int WAIT_ALL = ((1<<WAIT_BITS)-1);
|
||||
|
||||
protected:
|
||||
// parent (what i lock)
|
||||
@ -123,6 +124,9 @@ public:
|
||||
void finish_waiters(int mask, int r=0) {
|
||||
parent->finish_waiting(mask << wait_offset, r);
|
||||
}
|
||||
void take_waiting(int mask, list<Context*>& ls) {
|
||||
parent->take_waiting(mask << wait_offset, ls);
|
||||
}
|
||||
void add_waiter(int mask, Context *c) {
|
||||
parent->add_waiter(mask << wait_offset, c);
|
||||
}
|
||||
|
@ -23,13 +23,16 @@
|
||||
|
||||
class MMDSCacheRejoin : public Message {
|
||||
public:
|
||||
static const int OP_REJOIN = 1; // replica -> auth, i exist. and maybe my lock state.
|
||||
static const int OP_WEAK = 1; // replica -> auth, i exist, + maybe open files.
|
||||
static const int OP_STRONG = 2; // replica -> auth, i exist, + open files and lock state.
|
||||
static const int OP_ACK = 3; // auth -> replica, here is your lock state.
|
||||
static const int OP_MISSING = 4; // auth -> replica, i am missing these items
|
||||
static const int OP_FULL = 5; // replica -> auth, here is the full object.
|
||||
static const int OP_PURGE = 4; // auth -> replica, remove these items, they are old/obsolete.
|
||||
static const int OP_MISSING = 5; // auth -> replica, i am missing these items
|
||||
static const int OP_FULL = 6; // replica -> auth, here is the full object.
|
||||
static const char *get_opname(int op) {
|
||||
switch (op) {
|
||||
case OP_REJOIN: return "rejoin";
|
||||
case OP_WEAK: return "weak";
|
||||
case OP_STRONG: return "strong";
|
||||
case OP_ACK: return "ack";
|
||||
case OP_MISSING: return "missing";
|
||||
case OP_FULL: return "full";
|
||||
@ -77,27 +80,39 @@ class MMDSCacheRejoin : public Message {
|
||||
dirfrag_strong(int n) : nonce(n) {}
|
||||
};
|
||||
struct dn_strong {
|
||||
inodeno_t ino;
|
||||
inodeno_t remote_ino;
|
||||
int32_t nonce;
|
||||
int32_t lock;
|
||||
dn_strong() {}
|
||||
dn_strong(int n, int l) : nonce(n), lock(l) {}
|
||||
dn_strong() : ino(0), remote_ino(0), nonce(0), lock(0) {}
|
||||
dn_strong(inodeno_t pi, inodeno_t ri, int n, int l) :
|
||||
ino(pi), remote_ino(ri), nonce(n), lock(l) {}
|
||||
bool is_primary() { return ino > 0; }
|
||||
bool is_remote() { return remote_ino > 0; }
|
||||
bool is_null() { return ino == 0 && remote_ino == 0; }
|
||||
};
|
||||
|
||||
struct dn_weak {
|
||||
inodeno_t ino;
|
||||
inodeno_t remote_ino;
|
||||
dn_weak() : ino(0), remote_ino(0) {}
|
||||
dn_weak(inodeno_t pi, inodeno_t ri) : ino(pi), remote_ino(ri) {}
|
||||
bool is_primary() { return ino > 0; }
|
||||
bool is_remote() { return remote_ino > 0; }
|
||||
bool is_null() { return ino == 0 && remote_ino == 0; }
|
||||
};
|
||||
|
||||
// -- data --
|
||||
int32_t op;
|
||||
|
||||
// weak
|
||||
map<dirfrag_t, map<string, dn_weak> > weak_dentries;
|
||||
map<dirfrag_t, map<string, dn_weak> > weak;
|
||||
set<inodeno_t> weak_inodes;
|
||||
|
||||
// strong
|
||||
map<inodeno_t, inode_strong> strong_inodes;
|
||||
map<dirfrag_t, dirfrag_strong> strong_dirfrags;
|
||||
map<dirfrag_t, map<string, dn_strong> > strong_dentries;
|
||||
map<inodeno_t, inode_strong> strong_inodes;
|
||||
|
||||
// full
|
||||
list<inode_full> full_inodes;
|
||||
@ -120,6 +135,9 @@ class MMDSCacheRejoin : public Message {
|
||||
|
||||
// -- builders --
|
||||
// inodes
|
||||
void add_weak_inode(inodeno_t i) {
|
||||
weak_inodes.insert(i);
|
||||
}
|
||||
void add_strong_inode(inodeno_t i, int n, int cw, int a, int l, int dft, int f, int dl) {
|
||||
strong_inodes[i] = inode_strong(n, cw, a, l, dft, f, dl);
|
||||
}
|
||||
@ -135,24 +153,30 @@ class MMDSCacheRejoin : public Message {
|
||||
|
||||
// dirfrags
|
||||
void add_weak_dirfrag(dirfrag_t df) {
|
||||
weak_dentires[df];
|
||||
weak[df];
|
||||
}
|
||||
void add_weak_dirfrag(dirfrag_t df, map<string,dn_weak>& dnmap) {
|
||||
weak[df] = dnmap;
|
||||
}
|
||||
void add_strong_dirfrag(dirfrag_t df, int n) {
|
||||
strong_dirfrags[df] = dirfrag_strong(n);
|
||||
}
|
||||
|
||||
// dentries
|
||||
void add_weak_dentry(dirfrag_t df, const string& dname, dn_weak& dnw) {
|
||||
weak[df][dname] = dnw;
|
||||
}
|
||||
void add_weak_null_dentry(dirfrag_t df, const string& dname) {
|
||||
weak_dentries[df][dname] = dn_weak(0, 0);
|
||||
weak[df][dname] = dn_weak(0, 0);
|
||||
}
|
||||
void add_weak_primary_dentry(dirfrag_t df, const string& dname, inodeno_t ino) {
|
||||
weak_dentries[df][dname] = dn_weak(ino, 0);
|
||||
weak[df][dname] = dn_weak(ino, 0);
|
||||
}
|
||||
void add_weak_remote_dentry(dirfrag_t df, const string& dname, inodeno_t ino) {
|
||||
weak_dentries[df][dname] = dn_weak(0, ino);
|
||||
weak[df][dname] = dn_weak(0, ino);
|
||||
}
|
||||
void add_strong_dentry(dirfrag_t df, const string& dname, int n, int ls) {
|
||||
strong_dentries[df][dname] = dn_strong(n, ls);
|
||||
void add_strong_dentry(dirfrag_t df, const string& dname, inodeno_t pi, inodeno_t ri, int n, int ls) {
|
||||
strong_dentries[df][dname] = dn_strong(pi, ri, n, ls);
|
||||
}
|
||||
void add_dentry_authpin(dirfrag_t df, const string& dname, const metareqid_t& ri) {
|
||||
authpinned_dentries[df][dname] = ri;
|
||||
@ -174,7 +198,8 @@ class MMDSCacheRejoin : public Message {
|
||||
::_encode(authpinned_inodes, payload);
|
||||
::_encode(xlocked_inodes, payload);
|
||||
::_encode(strong_dirfrags, payload);
|
||||
::_encode(weak_dentries, payload);
|
||||
::_encode(weak, payload);
|
||||
::_encode(weak_inodes, payload);
|
||||
::_encode(strong_dentries, payload);
|
||||
::_encode(authpinned_dentries, payload);
|
||||
::_encode(xlocked_dentries, payload);
|
||||
@ -192,7 +217,8 @@ class MMDSCacheRejoin : public Message {
|
||||
::_decode(authpinned_inodes, payload, off);
|
||||
::_decode(xlocked_inodes, payload, off);
|
||||
::_decode(strong_dirfrags, payload, off);
|
||||
::_decode(weak_dentries, payload, off);
|
||||
::_decode(weak, payload, off);
|
||||
::_decode(weak_inodes, payload, off);
|
||||
::_decode(strong_dentries, payload, off);
|
||||
::_decode(authpinned_dentries, payload, off);
|
||||
::_decode(xlocked_dentries, payload, off);
|
||||
|
Loading…
Reference in New Issue
Block a user